Series comparison

 [PULL 00/20] tcg patch queue
-The following changes since commit e93ded1bf6c94ab95015b33e188bc8b0b0c32670:
+TCG patch queue, plus one target/sh4 patch that
 Yoshinori Sato asked me to process.
-  Merge tag 'testing-pull-request-2022-08-30' of https://gitlab.com/thuth/qemu into staging (2022-08-31 18:19:03 -0400)
 r~
 The following changes since commit efbf38d73e5dcc4d5f8b98c6e7a12be1f3b91745:
   Merge tag 'for-upstream' of git://repo.or.cz/qemu/kevin into staging (2022-10-03 15:06:07 -0400)
 are available in the Git repository at:
-  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220901
+  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221004
-for you to fetch changes up to 20011be2e30b8aa8ef1fc258485f00c688703deb:
+for you to fetch changes up to ab419fd8a035a65942de4e63effcd55ccbf1a9fe:
-  target/riscv: Make translator stop before the end of a page (2022-09-01 07:43:08 +0100)
+  target/sh4: Fix TB_FLAG_UNALIGN (2022-10-04 12:33:05 -0700)
 ----------------------------------------------------------------
-Respect PROT_EXEC in user-only mode.
+Cache CPUClass for use in hot code paths.
-Fix s390x, i386 and riscv for translations crossing a page.
+Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full.
 Add generic support for TARGET_TB_PCREL.
 tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07
 target/sh4: Fix TB_FLAG_UNALIGN
 ----------------------------------------------------------------
-Ilya Leoshkevich (4):
+Alex Bennée (3):
-      linux-user: Clear translations on mprotect()
+      cpu: cache CPUClass in CPUState for hot code paths
-      accel/tcg: Introduce is_same_page()
+      hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs
-      target/s390x: Make translator stop before the end of a page
+      cputlb: used cached CPUClass in our hot-paths
-      target/i386: Make translator stop before the end of a page
 Leandro Lupori (1):
       tcg/ppc: Optimize 26-bit jumps
 Richard Henderson (16):
-      linux-user/arm: Mark the commpage executable
+      accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
-      linux-user/hppa: Allocate page zero as a commpage
+      accel/tcg: Drop addr member from SavedIOTLB
-      linux-user/x86_64: Allocate vsyscall page as a commpage
+      accel/tcg: Suppress auto-invalidate in probe_access_internal
-      linux-user: Honor PT_GNU_STACK
+      accel/tcg: Introduce probe_access_full
-      tests/tcg/i386: Move smc_code2 to an executable section
+      accel/tcg: Introduce tlb_set_page_full
-      accel/tcg: Properly implement get_page_addr_code for user-only
+      include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
-      accel/tcg: Unlock mmap_lock after longjmp
+      accel/tcg: Remove PageDesc code_bitmap
-      accel/tcg: Make tb_htable_lookup static
+      accel/tcg: Use bool for page_find_alloc
-      accel/tcg: Move qemu_ram_addr_from_host_nofail to physmem.c
+      accel/tcg: Use DisasContextBase in plugin_gen_tb_start
-      accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp
+      accel/tcg: Do not align tb->page_addr[0]
-      accel/tcg: Document the faulting lookup in tb_lookup_cmp
+      accel/tcg: Inline tb_flush_jmp_cache
-      accel/tcg: Remove translator_ldsw
+      include/hw/core: Create struct CPUJumpCache
-      accel/tcg: Add pc and host_pc params to gen_intermediate_code
+      hw/core: Add CPUClass.get_pc
-      accel/tcg: Add fast path for translator_ld*
+      accel/tcg: Introduce tb_pc and log_pc
-      target/riscv: Add MAX_INSN_LEN and insn_len
+      accel/tcg: Introduce TARGET_TB_PCREL
-      target/riscv: Make translator stop before the end of a page
+      target/sh4: Fix TB_FLAG_UNALIGN
- include/elf.h                     |   1 +
+ accel/tcg/internal.h                    |  10 ++
- include/exec/cpu-common.h         |   1 +
+ accel/tcg/tb-hash.h                     |   1 +
- include/exec/exec-all.h           |  89 ++++++++----------------
+ accel/tcg/tb-jmp-cache.h                |  65 ++++++++
- include/exec/translator.h         |  96 ++++++++++++++++---------
+ include/exec/cpu-common.h               |   1 +
- linux-user/arm/target_cpu.h       |   4 +-
+ include/exec/cpu-defs.h                 |  48 ++++--
- linux-user/qemu.h                 |   1 +
+ include/exec/exec-all.h                 |  75 ++++++++-
- accel/tcg/cpu-exec.c              | 143 ++++++++++++++++++++------------------
+ include/exec/plugin-gen.h               |   7 +-
- accel/tcg/cputlb.c                |  93 +++++++------------------
+ include/hw/core/cpu.h                   |  28 ++--
- accel/tcg/translate-all.c         |  29 ++++----
+ include/qemu/typedefs.h                 |   2 +
- accel/tcg/translator.c            | 135 ++++++++++++++++++++++++++---------
+ include/tcg/tcg.h                       |   2 +-
- accel/tcg/user-exec.c             |  17 ++++-
+ target/sh4/cpu.h                        |  56 ++++---
- linux-user/elfload.c              |  82 ++++++++++++++++++++--
+ accel/stubs/tcg-stub.c                  |   4 +
- linux-user/mmap.c                 |   6 +-
+ accel/tcg/cpu-exec.c                    |  80 +++++-----
- softmmu/physmem.c                 |  12 ++++
+ accel/tcg/cputlb.c                      | 259 ++++++++++++++++++--------------
- target/alpha/translate.c          |   5 +-
+ accel/tcg/plugin-gen.c                  |  22 +--
- target/arm/translate.c            |   5 +-
+ accel/tcg/translate-all.c               | 214 ++++++++++++--------------
- target/avr/translate.c            |   5 +-
+ accel/tcg/translator.c                  |   2 +-
- target/cris/translate.c           |   5 +-
+ cpu.c                                   |   9 +-
- target/hexagon/translate.c        |   6 +-
+ hw/core/cpu-common.c                    |   3 +-
- target/hppa/translate.c           |   5 +-
+ hw/core/cpu-sysemu.c                    |   5 +-
- target/i386/tcg/translate.c       |  71 +++++++++++--------
+ linux-user/sh4/signal.c                 |   6 +-
- target/loongarch/translate.c      |   6 +-
+ plugins/core.c                          |   2 +-
- target/m68k/translate.c           |   5 +-
+ target/alpha/cpu.c                      |   9 ++
- target/microblaze/translate.c     |   5 +-
+ target/arm/cpu.c                        |  17 ++-
- target/mips/tcg/translate.c       |   5 +-
+ target/arm/mte_helper.c                 |  14 +-
- target/nios2/translate.c          |   5 +-
+ target/arm/sve_helper.c                 |   4 +-
- target/openrisc/translate.c       |   6 +-
+ target/arm/translate-a64.c              |   2 +-
- target/ppc/translate.c            |   5 +-
+ target/avr/cpu.c                        |  10 +-
- target/riscv/translate.c          |  32 +++++++--
+ target/cris/cpu.c                       |   8 +
- target/rx/translate.c             |   5 +-
+ target/hexagon/cpu.c                    |  10 +-
- target/s390x/tcg/translate.c      |  20 ++++--
+ target/hppa/cpu.c                       |  12 +-
- target/sh4/translate.c            |   5 +-
+ target/i386/cpu.c                       |   9 ++
- target/sparc/translate.c          |   5 +-
+ target/i386/tcg/tcg-cpu.c               |   2 +-
- target/tricore/translate.c        |   6 +-
+ target/loongarch/cpu.c                  |  11 +-
- target/xtensa/translate.c         |   6 +-
+ target/m68k/cpu.c                       |   8 +
- tests/tcg/i386/test-i386.c        |   2 +-
+ target/microblaze/cpu.c                 |  10 +-
- tests/tcg/riscv64/noexec.c        |  79 +++++++++++++++++++++
+ target/mips/cpu.c                       |   8 +
- tests/tcg/s390x/noexec.c          | 106 ++++++++++++++++++++++++++++
+ target/mips/tcg/exception.c             |   2 +-
- tests/tcg/x86_64/noexec.c         |  75 ++++++++++++++++++++
+ target/mips/tcg/sysemu/special_helper.c |   2 +-
- tests/tcg/multiarch/noexec.c.inc  | 139 ++++++++++++++++++++++++++++++++++++
+ target/nios2/cpu.c                      |   9 ++
- tests/tcg/riscv64/Makefile.target |   1 +
+ target/openrisc/cpu.c                   |  10 +-
- tests/tcg/s390x/Makefile.target   |   1 +
+ target/ppc/cpu_init.c                   |   8 +
- tests/tcg/x86_64/Makefile.target  |   3 +-
+ target/riscv/cpu.c                      |  17 ++-
-files changed, 966 insertions(+), 367 deletions(-)
+ target/rx/cpu.c                         |  10 +-
- create mode 100644 tests/tcg/riscv64/noexec.c
+ target/s390x/cpu.c                      |   8 +
- create mode 100644 tests/tcg/s390x/noexec.c
+ target/s390x/tcg/mem_helper.c           |   4 -
- create mode 100644 tests/tcg/x86_64/noexec.c
+ target/sh4/cpu.c                        |  18 ++-
- create mode 100644 tests/tcg/multiarch/noexec.c.inc
+ target/sh4/helper.c                     |   6 +-
  target/sh4/translate.c                  |  90 +++++------
  target/sparc/cpu.c                      |  10 +-
  target/tricore/cpu.c                    |  11 +-
  target/xtensa/cpu.c                     |   8 +
  tcg/tcg.c                               |   8 +-
  trace/control-target.c                  |   2 +-
  tcg/ppc/tcg-target.c.inc                | 119 +++++++++++----
 files changed, 915 insertions(+), 462 deletions(-)
  create mode 100644 accel/tcg/tb-jmp-cache.h

-[PULL 07/20] accel/tcg: Introduce is_same_page()
+[PULL 01/20] cpu: cache CPUClass in CPUState for hot code paths
-From: Ilya Leoshkevich <iii@linux.ibm.com>
+From: Alex Bennée <alex.bennee@linaro.org>
-Introduce a function that checks whether a given address is on the same
+The class cast checkers are quite expensive and always on (unlike the
-page as where disassembly started. Having it improves readability of
+dynamic case who's checks are gated by CONFIG_QOM_CAST_DEBUG). To
-the following patches.
+avoid the overhead of repeatedly checking something which should never
 change we cache the CPUClass reference for use in the hot code paths.
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
+Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
 Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Message-Id: <20220811095534.241224-3-iii@linux.ibm.com>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-[rth: Make the DisasContextBase parameter const.]
+Message-Id: <20220811151413.3350684-3-alex.bennee@linaro.org>
 Signed-off-by: Cédric Le Goater <clg@kaod.org>
 Message-Id: <20220923084803.498337-3-clg@kaod.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/exec/translator.h | 10 ++++++++++
+ include/hw/core/cpu.h | 9 +++++++++
-file changed, 10 insertions(+)
+ cpu.c                 | 9 ++++-----
 files changed, 13 insertions(+), 5 deletions(-)
-diff --git a/include/exec/translator.h b/include/exec/translator.h
+diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/exec/translator.h
+--- a/include/hw/core/cpu.h
-+++ b/include/exec/translator.h
++++ b/include/hw/core/cpu.h
-@@ -XXX,XX +XXX,XX @@ FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
+@@ -XXX,XX +XXX,XX @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
+  */
- #undef GEN_TRANSLATOR_LD
+ #define CPU(obj) ((CPUState *)(obj))
 +/*
-+ * Return whether addr is on the same page as where disassembly started.
++ * The class checkers bring in CPU_GET_CLASS() which is potentially
-+ * Translators can use this to enforce the rule that only single-insn
++ * expensive given the eventual call to
-+ * translation blocks are allowed to cross page boundaries.
++ * object_class_dynamic_cast_assert(). Because of this the CPUState
 + * has a cached value for the class in cs->cc which is set up in
 + * cpu_exec_realizefn() for use in hot code paths.
 + */
-+static inline bool is_same_page(const DisasContextBase *db, target_ulong addr)
+ typedef struct CPUClass CPUClass;
-+{
+ DECLARE_CLASS_CHECKERS(CPUClass, CPU,
-+    return ((addr ^ db->pc_first) & TARGET_PAGE_MASK) == 0;
+                        TYPE_CPU)
-+}
+@@ -XXX,XX +XXX,XX @@ struct qemu_work_item;
-+
+ struct CPUState {
- #endif /* EXEC__TRANSLATOR_H */
+     /*< private >*/
      DeviceState parent_obj;
 +    /* cache to avoid expensive CPU_GET_CLASS */
 +    CPUClass *cc;
      /*< public >*/
      int nr_cores;
 diff --git a/cpu.c b/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/cpu.c
 +++ b/cpu.c
@@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_cpu_common = {
  void cpu_exec_realizefn(CPUState *cpu, Error **errp)
  {
 -#ifndef CONFIG_USER_ONLY
 -    CPUClass *cc = CPU_GET_CLASS(cpu);
 -#endif
 +    /* cache the cpu class for the hotpath */
 +    cpu->cc = CPU_GET_CLASS(cpu);
      cpu_list_add(cpu);
      if (!accel_cpu_realizefn(cpu, errp)) {
@@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
          vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
      }
 -    if (cc->sysemu_ops->legacy_vmsd != NULL) {
 -        vmstate_register(NULL, cpu->cpu_index, cc->sysemu_ops->legacy_vmsd, cpu);
 +    if (cpu->cc->sysemu_ops->legacy_vmsd != NULL) {
 +        vmstate_register(NULL, cpu->cpu_index, cpu->cc->sysemu_ops->legacy_vmsd, cpu);
      }
  #endif /* CONFIG_USER_ONLY */
  }
 --
 .34.1

-[PULL 05/20] linux-user: Clear translations on mprotect()
+[PULL 02/20] hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs
-From: Ilya Leoshkevich <iii@linux.ibm.com>
+From: Alex Bennée <alex.bennee@linaro.org>
-Currently it's possible to execute pages that do not have PAGE_EXEC
+This is a heavily used function so lets avoid the cost of
-if there is an existing translation block. Fix by invalidating TBs
+CPU_GET_CLASS. On the romulus-bmc run it has a modest effect:
 that touch the affected pages.
-Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+  Before: 36.812 s ±  0.506 s
-Message-Id: <20220817150506.592862-2-iii@linux.ibm.com>
+  After:  35.912 s ±  0.168 s
 Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20220811151413.3350684-4-alex.bennee@linaro.org>
 Signed-off-by: Cédric Le Goater <clg@kaod.org>
 Message-Id: <20220923084803.498337-4-clg@kaod.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- linux-user/mmap.c | 6 ++++--
+ hw/core/cpu-sysemu.c | 5 ++---
-file changed, 4 insertions(+), 2 deletions(-)
+file changed, 2 insertions(+), 3 deletions(-)
-diff --git a/linux-user/mmap.c b/linux-user/mmap.c
+diff --git a/hw/core/cpu-sysemu.c b/hw/core/cpu-sysemu.c
 index XXXXXXX..XXXXXXX 100644
---- a/linux-user/mmap.c
+--- a/hw/core/cpu-sysemu.c
-+++ b/linux-user/mmap.c
++++ b/hw/core/cpu-sysemu.c
-@@ -XXX,XX +XXX,XX @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
+@@ -XXX,XX +XXX,XX @@ hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
-             goto error;
-         }
+ int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
  {
 -    CPUClass *cc = CPU_GET_CLASS(cpu);
      int ret = 0;
 -    if (cc->sysemu_ops->asidx_from_attrs) {
 -        ret = cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
 +    if (cpu->cc->sysemu_ops->asidx_from_attrs) {
 +        ret = cpu->cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
          assert(ret < cpu->num_ases && ret >= 0);
      }
-+
-     page_set_flags(start, start + len, page_flags);
--    mmap_unlock();
--    return 0;
-+    tb_invalidate_phys_range(start, start + len);
-+    ret = 0;
-+
- error:
-     mmap_unlock();
      return ret;
 --
 .34.1

-[PULL 11/20] accel/tcg: Move qemu_ram_addr_from_host_nofail to physmem.c
+[PULL 03/20] cputlb: used cached CPUClass in our hot-paths
-The base qemu_ram_addr_from_host function is already in
+From: Alex Bennée <alex.bennee@linaro.org>
 softmmu/physmem.c; move the nofail version to be adjacent.
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
+Before: 35.912 s ±  0.168 s
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+  After: 35.565 s ±  0.087 s
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Message-Id: <20220811151413.3350684-5-alex.bennee@linaro.org>
 Signed-off-by: Cédric Le Goater <clg@kaod.org>
 Message-Id: <20220923084803.498337-5-clg@kaod.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/exec/cpu-common.h |  1 +
+ accel/tcg/cputlb.c | 15 ++++++---------
- accel/tcg/cputlb.c        | 12 ------------
+file changed, 6 insertions(+), 9 deletions(-)
  softmmu/physmem.c         | 12 ++++++++++++
 files changed, 13 insertions(+), 12 deletions(-)
-diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/exec/cpu-common.h
-+++ b/include/exec/cpu-common.h
-@@ -XXX,XX +XXX,XX @@ typedef uintptr_t ram_addr_t;
- void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
- /* This should not be used by devices.  */
- ram_addr_t qemu_ram_addr_from_host(void *ptr);
-+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
- RAMBlock *qemu_ram_block_by_name(const char *name);
- RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
-                                    ram_addr_t *offset);
 diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cputlb.c
 +++ b/accel/tcg/cputlb.c
 @@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
-                             prot, mmu_idx, size);
+ static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
                       MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
  {
 -    CPUClass *cc = CPU_GET_CLASS(cpu);
      bool ok;
      /*
       * This is not a probe, so only valid return is success; failure
       * should result in exception + longjmp to the cpu loop.
       */
 -    ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
 -                               access_type, mmu_idx, false, retaddr);
 +    ok = cpu->cc->tcg_ops->tlb_fill(cpu, addr, size,
 +                                    access_type, mmu_idx, false, retaddr);
      assert(ok);
  }
--static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+@@ -XXX,XX +XXX,XX @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
--{
+                                         MMUAccessType access_type,
--    ram_addr_t ram_addr;
+                                         int mmu_idx, uintptr_t retaddr)
  {
 -    CPUClass *cc = CPU_GET_CLASS(cpu);
 -
--    ram_addr = qemu_ram_addr_from_host(ptr);
+-    cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
--    if (ram_addr == RAM_ADDR_INVALID) {
++    cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
--        error_report("Bad ram pointer %p", ptr);
++                                          mmu_idx, retaddr);
 -        abort();
 -    }
 -    return ram_addr;
 -}
 -
  /*
   * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
   * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
 diff --git a/softmmu/physmem.c b/softmmu/physmem.c
 index XXXXXXX..XXXXXXX 100644
 --- a/softmmu/physmem.c
 +++ b/softmmu/physmem.c
@@ -XXX,XX +XXX,XX @@ ram_addr_t qemu_ram_addr_from_host(void *ptr)
      return block->offset + offset;
  }
-+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
-+{
+@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
-+    ram_addr_t ram_addr;
+     if (!tlb_hit_page(tlb_addr, page_addr)) {
-+
+         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
-+    ram_addr = qemu_ram_addr_from_host(ptr);
+             CPUState *cs = env_cpu(env);
-+    if (ram_addr == RAM_ADDR_INVALID) {
+-            CPUClass *cc = CPU_GET_CLASS(cs);
-+        error_report("Bad ram pointer %p", ptr);
-+        abort();
+-            if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
-+    }
+-                                       mmu_idx, nonfault, retaddr)) {
-+    return ram_addr;
++            if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
-+}
++                                           mmu_idx, nonfault, retaddr)) {
-+
+                 /* Non-faulting page table read failed.  */
- static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
+                 *phost = NULL;
-                                  MemTxAttrs attrs, void *buf, hwaddr len);
+                 return TLB_INVALID_MASK;
  static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
 --
 .34.1

-[PULL 04/20] linux-user: Honor PT_GNU_STACK
+[PULL 04/20] accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
-Map the stack executable if required by default or on demand.
+This structure will shortly contain more than just
 data for accessing MMIO.  Rename the 'addr' member
 to 'xlat_section' to more clearly indicate its purpose.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/elf.h        |  1 +
+ include/exec/cpu-defs.h    |  22 ++++----
- linux-user/qemu.h    |  1 +
+ accel/tcg/cputlb.c         | 102 +++++++++++++++++++------------------
- linux-user/elfload.c | 19 ++++++++++++++++++-
+ target/arm/mte_helper.c    |  14 ++---
-files changed, 20 insertions(+), 1 deletion(-)
+ target/arm/sve_helper.c    |   4 +-
  target/arm/translate-a64.c |   2 +-
 files changed, 73 insertions(+), 71 deletions(-)
-diff --git a/include/elf.h b/include/elf.h
+diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
 index XXXXXXX..XXXXXXX 100644
---- a/include/elf.h
+--- a/include/exec/cpu-defs.h
-+++ b/include/elf.h
++++ b/include/exec/cpu-defs.h
-@@ -XXX,XX +XXX,XX @@ typedef int64_t  Elf64_Sxword;
+@@ -XXX,XX +XXX,XX @@ typedef uint64_t target_ulong;
- #define PT_LOPROC  0x70000000
+ #  endif
- #define PT_HIPROC  0x7fffffff
+ # endif
-+#define PT_GNU_STACK      (PT_LOOS + 0x474e551)
++/* Minimalized TLB entry for use by TCG fast path. */
- #define PT_GNU_PROPERTY   (PT_LOOS + 0x474e553)
+ typedef struct CPUTLBEntry {
+     /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
- #define PT_MIPS_REGINFO   0x70000000
+        bit TARGET_PAGE_BITS-1..4  : Nonzero for accesses that should not
-diff --git a/linux-user/qemu.h b/linux-user/qemu.h
+@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntry {
  QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
 -/* The IOTLB is not accessed directly inline by generated TCG code,
 - * so the CPUIOTLBEntry layout is not as critical as that of the
 - * CPUTLBEntry. (This is also why we don't want to combine the two
 - * structs into one.)
 +/*
 + * The full TLB entry, which is not accessed by generated TCG code,
 + * so the layout is not as critical as that of CPUTLBEntry. This is
 + * also why we don't want to combine the two structs.
   */
 -typedef struct CPUIOTLBEntry {
 +typedef struct CPUTLBEntryFull {
      /*
 -     * @addr contains:
 +     * @xlat_section contains:
       *  - in the lower TARGET_PAGE_BITS, a physical section number
       *  - with the lower TARGET_PAGE_BITS masked off, an offset which
       *    must be added to the virtual address to obtain:
@@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry {
       *       number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
       *     + the offset within the target MemoryRegion (otherwise)
       */
 -    hwaddr addr;
 +    hwaddr xlat_section;
      MemTxAttrs attrs;
 -} CPUIOTLBEntry;
 +} CPUTLBEntryFull;
  /*
   * Data elements that are per MMU mode, minus the bits accessed by
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBDesc {
      size_t vindex;
      /* The tlb victim table, in two parts.  */
      CPUTLBEntry vtable[CPU_VTLB_SIZE];
 -    CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
 -    /* The iotlb.  */
 -    CPUIOTLBEntry *iotlb;
 +    CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
 +    CPUTLBEntryFull *fulltlb;
  } CPUTLBDesc;
  /*
 diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 index XXXXXXX..XXXXXXX 100644
---- a/linux-user/qemu.h
+--- a/accel/tcg/cputlb.c
-+++ b/linux-user/qemu.h
++++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ struct image_info {
+@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
-         uint32_t        elf_flags;
+     }
-         int             personality;
-         abi_ulong       alignment;
+     g_free(fast->table);
-+        bool            exec_stack;
+-    g_free(desc->iotlb);
++    g_free(desc->fulltlb);
-         /* Generic semihosting knows about these pointers. */
-         abi_ulong       arg_strings;   /* strings for argv */
+     tlb_window_reset(desc, now, 0);
-diff --git a/linux-user/elfload.c b/linux-user/elfload.c
+     /* desc->n_used_entries is cleared by the caller */
      fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
      fast->table = g_try_new(CPUTLBEntry, new_size);
 -    desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
 +    desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
      /*
       * If the allocations fail, try smaller sizes. We just freed some
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
       * allocations to fail though, so we progressively reduce the allocation
       * size, aborting if we cannot even allocate the smallest TLB we support.
       */
 -    while (fast->table == NULL || desc->iotlb == NULL) {
 +    while (fast->table == NULL || desc->fulltlb == NULL) {
          if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
              error_report("%s: %s", __func__, strerror(errno));
              abort();
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
          fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
          g_free(fast->table);
 -        g_free(desc->iotlb);
 +        g_free(desc->fulltlb);
          fast->table = g_try_new(CPUTLBEntry, new_size);
 -        desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
 +        desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
      }
  }
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
      desc->n_used_entries = 0;
      fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
      fast->table = g_new(CPUTLBEntry, n_entries);
 -    desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
 +    desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
      tlb_mmu_flush_locked(desc, fast);
  }
@@ -XXX,XX +XXX,XX @@ void tlb_destroy(CPUState *cpu)
          CPUTLBDescFast *fast = &env_tlb(env)->f[i];
          g_free(fast->table);
 -        g_free(desc->iotlb);
 +        g_free(desc->fulltlb);
      }
  }
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
          /* Evict the old entry into the victim tlb.  */
          copy_tlb_helper_locked(tv, te);
 -        desc->viotlb[vidx] = desc->iotlb[index];
 +        desc->vfulltlb[vidx] = desc->fulltlb[index];
          tlb_n_used_entries_dec(env, mmu_idx);
      }
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
       * subtract here is that of the page base, and not the same as the
       * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
       */
 -    desc->iotlb[index].addr = iotlb - vaddr_page;
 -    desc->iotlb[index].attrs = attrs;
 +    desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
 +    desc->fulltlb[index].attrs = attrs;
      /* Now calculate the new entry */
      tn.addend = addend - vaddr_page;
@@ -XXX,XX +XXX,XX @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
      }
  }
 -static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
 +static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
                           int mmu_idx, target_ulong addr, uintptr_t retaddr,
                           MMUAccessType access_type, MemOp op)
  {
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
      bool locked = false;
      MemTxResult r;
 -    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
 +    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
      mr = section->mr;
 -    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
 +    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
      cpu->mem_io_pc = retaddr;
      if (!cpu->can_do_io) {
          cpu_io_recompile(cpu, retaddr);
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
          qemu_mutex_lock_iothread();
          locked = true;
      }
 -    r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
 +    r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
      if (r != MEMTX_OK) {
          hwaddr physaddr = mr_offset +
              section->offset_within_address_space -
              section->offset_within_region;
          cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
 -                               mmu_idx, iotlbentry->attrs, r, retaddr);
 +                               mmu_idx, full->attrs, r, retaddr);
      }
      if (locked) {
          qemu_mutex_unlock_iothread();
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
  }
  /*
 - * Save a potentially trashed IOTLB entry for later lookup by plugin.
 - * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
 + * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
 + * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
   * because of the side effect of io_writex changing memory layout.
   */
  static void save_iotlb_data(CPUState *cs, hwaddr addr,
@@ -XXX,XX +XXX,XX @@ static void save_iotlb_data(CPUState *cs, hwaddr addr,
  #endif
  }
 -static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
 +static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
                        int mmu_idx, uint64_t val, target_ulong addr,
                        uintptr_t retaddr, MemOp op)
  {
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
      bool locked = false;
      MemTxResult r;
 -    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
 +    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
      mr = section->mr;
 -    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
 +    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
      if (!cpu->can_do_io) {
          cpu_io_recompile(cpu, retaddr);
      }
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
       * The memory_region_dispatch may trigger a flush/resize
       * so for plugins we save the iotlb_data just in case.
       */
 -    save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
 +    save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
      if (!qemu_mutex_iothread_locked()) {
          qemu_mutex_lock_iothread();
          locked = true;
      }
 -    r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
 +    r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
      if (r != MEMTX_OK) {
          hwaddr physaddr = mr_offset +
              section->offset_within_address_space -
              section->offset_within_region;
          cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
 -                               MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
 +                               MMU_DATA_STORE, mmu_idx, full->attrs, r,
                                 retaddr);
      }
      if (locked) {
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
              copy_tlb_helper_locked(vtlb, &tmptlb);
              qemu_spin_unlock(&env_tlb(env)->c.lock);
 -            CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
 -            CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
 -            tmpio = *io; *io = *vio; *vio = tmpio;
 +            CPUTLBEntryFull *f1 = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 +            CPUTLBEntryFull *f2 = &env_tlb(env)->d[mmu_idx].vfulltlb[vidx];
 +            CPUTLBEntryFull tmpf;
 +            tmpf = *f1; *f1 = *f2; *f2 = tmpf;
              return true;
          }
      }
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
                   (ADDR) & TARGET_PAGE_MASK)
  static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
 -                           CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
 +                           CPUTLBEntryFull *full, uintptr_t retaddr)
  {
 -    ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
 +    ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
      trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
      /* Handle clean RAM pages.  */
      if (unlikely(flags & TLB_NOTDIRTY)) {
          uintptr_t index = tlb_index(env, mmu_idx, addr);
 -        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 +        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 -        notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
 +        notdirty_write(env_cpu(env), addr, 1, full, retaddr);
          flags &= ~TLB_NOTDIRTY;
      }
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
      if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
          uintptr_t index = tlb_index(env, mmu_idx, addr);
 -        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 +        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
          /* Handle watchpoints.  */
          if (flags & TLB_WATCHPOINT) {
              int wp_access = (access_type == MMU_DATA_STORE
                               ? BP_MEM_WRITE : BP_MEM_READ);
              cpu_check_watchpoint(env_cpu(env), addr, size,
 -                                 iotlbentry->attrs, wp_access, retaddr);
 +                                 full->attrs, wp_access, retaddr);
          }
          /* Handle clean RAM pages.  */
          if (flags & TLB_NOTDIRTY) {
 -            notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
 +            notdirty_write(env_cpu(env), addr, 1, full, retaddr);
          }
      }
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
   * should have just filled the TLB. The one corner case is io_writex
   * which can cause TLB flushes and potential resizing of the TLBs
   * losing the information we need. In those cases we need to recover
 - * data from a copy of the iotlbentry. As long as this always occurs
 + * data from a copy of the CPUTLBEntryFull. As long as this always occurs
   * from the same thread (which a mem callback will be) this is safe.
   */
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
      if (likely(tlb_hit(tlb_addr, addr))) {
          /* We must have an iotlb entry for MMIO */
          if (tlb_addr & TLB_MMIO) {
 -            CPUIOTLBEntry *iotlbentry;
 -            iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 +            CPUTLBEntryFull *full;
 +            full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
              data->is_io = true;
 -            data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
 -            data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
 +            data->v.io.section =
 +                iotlb_to_section(cpu, full->xlat_section, full->attrs);
 +            data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
          } else {
              data->is_io = false;
              data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
      if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
          notdirty_write(env_cpu(env), addr, size,
 -                       &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
 +                       &env_tlb(env)->d[mmu_idx].fulltlb[index], retaddr);
      }
      return hostaddr;
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
      /* Handle anything that isn't just a straight memory access.  */
      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
 -        CPUIOTLBEntry *iotlbentry;
 +        CPUTLBEntryFull *full;
          bool need_swap;
          /* For anything that is unaligned, recurse through full_load.  */
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
              goto do_unaligned_access;
          }
 -        iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 +        full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
          /* Handle watchpoints.  */
          if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
              /* On watchpoint hit, this will longjmp out.  */
              cpu_check_watchpoint(env_cpu(env), addr, size,
 -                                 iotlbentry->attrs, BP_MEM_READ, retaddr);
 +                                 full->attrs, BP_MEM_READ, retaddr);
          }
          need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
          /* Handle I/O access.  */
          if (likely(tlb_addr & TLB_MMIO)) {
 -            return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
 +            return io_readx(env, full, mmu_idx, addr, retaddr,
                              access_type, op ^ (need_swap * MO_BSWAP));
          }
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
       */
      if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
          cpu_check_watchpoint(env_cpu(env), addr, size - size2,
 -                             env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
 +                             env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
                               BP_MEM_WRITE, retaddr);
      }
      if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
          cpu_check_watchpoint(env_cpu(env), page2, size2,
 -                             env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
 +                             env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
                               BP_MEM_WRITE, retaddr);
      }
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
      /* Handle anything that isn't just a straight memory access.  */
      if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
 -        CPUIOTLBEntry *iotlbentry;
 +        CPUTLBEntryFull *full;
          bool need_swap;
          /* For anything that is unaligned, recurse through byte stores.  */
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
              goto do_unaligned_access;
          }
 -        iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 +        full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
          /* Handle watchpoints.  */
          if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
              /* On watchpoint hit, this will longjmp out.  */
              cpu_check_watchpoint(env_cpu(env), addr, size,
 -                                 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
 +                                 full->attrs, BP_MEM_WRITE, retaddr);
          }
          need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
          /* Handle I/O access.  */
          if (tlb_addr & TLB_MMIO) {
 -            io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
 +            io_writex(env, full, mmu_idx, val, addr, retaddr,
                        op ^ (need_swap * MO_BSWAP));
              return;
          }
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
          /* Handle clean RAM pages.  */
          if (tlb_addr & TLB_NOTDIRTY) {
 -            notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
 +            notdirty_write(env_cpu(env), addr, size, full, retaddr);
          }
          haddr = (void *)((uintptr_t)addr + entry->addend);
 diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
 index XXXXXXX..XXXXXXX 100644
---- a/linux-user/elfload.c
+--- a/target/arm/mte_helper.c
-+++ b/linux-user/elfload.c
++++ b/target/arm/mte_helper.c
-@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
+@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
- #define ELF_ARCH        EM_386
+     return tags + index;
  #define ELF_PLATFORM get_elf_platform()
 +#define EXSTACK_DEFAULT true
  static const char *get_elf_platform(void)
  {
@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
  #define ELF_ARCH        EM_ARM
  #define ELF_CLASS       ELFCLASS32
 +#define EXSTACK_DEFAULT true
  static inline void init_thread(struct target_pt_regs *regs,
                                 struct image_info *infop)
@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
  #else
+     uintptr_t index;
- #define ELF_CLASS       ELFCLASS32
+-    CPUIOTLBEntry *iotlbentry;
-+#define EXSTACK_DEFAULT true
++    CPUTLBEntryFull *full;
+     int in_page, flags;
      ram_addr_t ptr_ra;
      hwaddr ptr_paddr, tag_paddr, xlat;
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
      assert(!(flags & TLB_INVALID_MASK));
      /*
 -     * Find the iotlbentry for ptr.  This *must* be present in the TLB
 +     * Find the CPUTLBEntryFull for ptr.  This *must* be present in the TLB
       * because we just found the mapping.
       * TODO: Perhaps there should be a cputlb helper that returns a
       * matching tlb entry + iotlb entry.
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
          g_assert(tlb_hit(comparator, ptr));
      }
  # endif
 -    iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index];
 +    full = &env_tlb(env)->d[ptr_mmu_idx].fulltlb[index];
      /* If the virtual page MemAttr != Tagged, access unchecked. */
 -    if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) {
 +    if (!arm_tlb_mte_tagged(&full->attrs)) {
          return NULL;
      }
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
          int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
          assert(ra != 0);
          cpu_check_watchpoint(env_cpu(env), ptr, ptr_size,
 -                             iotlbentry->attrs, wp, ra);
 +                             full->attrs, wp, ra);
      }
      /*
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
      tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
      /* Look up the address in tag space. */
 -    tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
 +    tag_asi = full->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
      tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
      mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
                                   tag_access == MMU_DATA_STORE,
 -                                 iotlbentry->attrs);
 +                                 full->attrs);
      /*
       * Note that @mr will never be NULL.  If there is nothing in the address
 diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/sve_helper.c
 +++ b/target/arm/sve_helper.c
@@ -XXX,XX +XXX,XX @@ bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
          g_assert(tlb_hit(comparator, addr));
  # endif
 -        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
 -        info->attrs = iotlbentry->attrs;
 +        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 +        info->attrs = full->attrs;
      }
  #endif
-@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en
+diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
+index XXXXXXX..XXXXXXX 100644
- #define ELF_CLASS   ELFCLASS64
+--- a/target/arm/translate-a64.c
- #define ELF_ARCH    EM_LOONGARCH
++++ b/target/arm/translate-a64.c
-+#define EXSTACK_DEFAULT true
+@@ -XXX,XX +XXX,XX @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
+      * table entry even for that case.
- #define elf_check_arch(x) ((x) == EM_LOONGARCH)
+      */
+     return (tlb_hit(entry->addr_code, addr) &&
-@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap(void)
+-            arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
- #define ELF_CLASS   ELFCLASS32
++            arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].fulltlb[index].attrs));
  #endif
- #define ELF_ARCH    EM_MIPS
+ }
 +#define EXSTACK_DEFAULT true
  #ifdef TARGET_ABI_MIPSN32
  #define elf_check_abi(x) ((x) & EF_MIPS_ABI2)
@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
  #define bswaptls(ptr) bswap32s(ptr)
  #endif
 +#ifndef EXSTACK_DEFAULT
 +#define EXSTACK_DEFAULT false
 +#endif
 +
  #include "elf.h"
  /* We must delay the following stanzas until after "elf.h". */
@@ -XXX,XX +XXX,XX @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
                                   struct image_info *info)
  {
      abi_ulong size, error, guard;
 +    int prot;
      size = guest_stack_size;
      if (size < STACK_LOWER_LIMIT) {
@@ -XXX,XX +XXX,XX @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
          guard = qemu_real_host_page_size();
      }
 -    error = target_mmap(0, size + guard, PROT_READ | PROT_WRITE,
 +    prot = PROT_READ | PROT_WRITE;
 +    if (info->exec_stack) {
 +        prot |= PROT_EXEC;
 +    }
 +    error = target_mmap(0, size + guard, prot,
                          MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
      if (error == -1) {
          perror("mmap stack");
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
       */
      loaddr = -1, hiaddr = 0;
      info->alignment = 0;
 +    info->exec_stack = EXSTACK_DEFAULT;
      for (i = 0; i < ehdr->e_phnum; ++i) {
          struct elf_phdr *eppnt = phdr + i;
          if (eppnt->p_type == PT_LOAD) {
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
              if (!parse_elf_properties(image_fd, info, eppnt, bprm_buf, &err)) {
                  goto exit_errmsg;
              }
 +        } else if (eppnt->p_type == PT_GNU_STACK) {
 +            info->exec_stack = eppnt->p_flags & PF_X;
          }
      }
 --
 .34.1

-[PULL 4/4] target/avr: Disable interrupts when env->skip set
+[PULL 05/20] accel/tcg: Drop addr member from SavedIOTLB
-This bit is not saved across interrupts, so we must
+This field is only written, not read; remove it.
 delay delivering the interrupt until the skip has
 been processed.
-Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1118
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
+Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/avr/helper.c    |  9 +++++++++
+ include/hw/core/cpu.h | 1 -
- target/avr/translate.c | 26 ++++++++++++++++++++++----
+ accel/tcg/cputlb.c    | 7 +++----
-files changed, 31 insertions(+), 4 deletions(-)
+files changed, 3 insertions(+), 5 deletions(-)
-diff --git a/target/avr/helper.c b/target/avr/helper.c
+diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
 index XXXXXXX..XXXXXXX 100644
---- a/target/avr/helper.c
+--- a/include/hw/core/cpu.h
-+++ b/target/avr/helper.c
++++ b/include/hw/core/cpu.h
-@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+@@ -XXX,XX +XXX,XX @@ struct CPUWatchpoint {
-     AVRCPU *cpu = AVR_CPU(cs);
+  * the memory regions get moved around  by io_writex.
-     CPUAVRState *env = &cpu->env;
+  */
+ typedef struct SavedIOTLB {
-+    /*
+-    hwaddr addr;
-+     * We cannot separate a skip from the next instruction,
+     MemoryRegionSection *section;
-+     * as the skip would not be preserved across the interrupt.
+     hwaddr mr_offset;
-+     * Separating the two insn normally only happens at page boundaries.
+ } SavedIOTLB;
-+     */
+diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 +    if (env->skip) {
 +        return false;
 +    }
 +
      if (interrupt_request & CPU_INTERRUPT_RESET) {
          if (cpu_interrupts_enabled(env)) {
              cs->exception_index = EXCP_RESET;
 diff --git a/target/avr/translate.c b/target/avr/translate.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/avr/translate.c
+--- a/accel/tcg/cputlb.c
-+++ b/target/avr/translate.c
++++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void avr_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
+@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
-     if (skip_label) {
+  * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
-         canonicalize_skip(ctx);
+  * because of the side effect of io_writex changing memory layout.
-         gen_set_label(skip_label);
+  */
--        if (ctx->base.is_jmp == DISAS_NORETURN) {
+-static void save_iotlb_data(CPUState *cs, hwaddr addr,
-+
+-                            MemoryRegionSection *section, hwaddr mr_offset)
-+        switch (ctx->base.is_jmp) {
++static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
-+        case DISAS_NORETURN:
++                            hwaddr mr_offset)
              ctx->base.is_jmp = DISAS_CHAIN;
 +            break;
 +        case DISAS_NEXT:
 +            if (ctx->base.tb->flags & TB_FLAGS_SKIP) {
 +                ctx->base.is_jmp = DISAS_TOO_MANY;
 +            }
 +            break;
 +        default:
 +            break;
          }
      }
@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
  {
-     DisasContext *ctx = container_of(dcbase, DisasContext, base);
+ #ifdef CONFIG_PLUGIN
-     bool nonconst_skip = canonicalize_skip(ctx);
+     SavedIOTLB *saved = &cs->saved_iotlb;
-+    /*
+-    saved->addr = addr;
-+     * Because we disable interrupts while env->skip is set,
+     saved->section = section;
-+     * we must return to the main loop to re-evaluate afterward.
+     saved->mr_offset = mr_offset;
-+     */
+ #endif
-+    bool force_exit = ctx->base.tb->flags & TB_FLAGS_SKIP;
+@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
+      * The memory_region_dispatch may trigger a flush/resize
-     switch (ctx->base.is_jmp) {
+      * so for plugins we save the iotlb_data just in case.
-     case DISAS_NORETURN:
+      */
-@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
+-    save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
-     case DISAS_NEXT:
++    save_iotlb_data(cpu, section, mr_offset);
-     case DISAS_TOO_MANY:
-     case DISAS_CHAIN:
+     if (!qemu_mutex_iothread_locked()) {
--        if (!nonconst_skip) {
+         qemu_mutex_lock_iothread();
 +        if (!nonconst_skip && !force_exit) {
              /* Note gen_goto_tb checks singlestep.  */
              gen_goto_tb(ctx, 1, ctx->npc);
              break;
@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
          tcg_gen_movi_tl(cpu_pc, ctx->npc);
          /* fall through */
      case DISAS_LOOKUP:
 -        tcg_gen_lookup_and_goto_ptr();
 -        break;
 +        if (!force_exit) {
 +            tcg_gen_lookup_and_goto_ptr();
 +            break;
 +        }
 +        /* fall through */
      case DISAS_EXIT:
          tcg_gen_exit_tb(NULL, 0);
          break;
 --
 .34.1

-[PULL 20/20] target/riscv: Make translator stop before the end of a page
+[PULL 06/20] accel/tcg: Suppress auto-invalidate in probe_access_internal
-Right now the translator stops right *after* the end of a page, which
+When PAGE_WRITE_INV is set when calling tlb_set_page,
-breaks reporting of fault locations when the last instruction of a
+we immediately set TLB_INVALID_MASK in order to force
-multi-insn translation block crosses a page boundary.
+tlb_fill to be called on the next lookup.  Here in
 probe_access_internal, we have just called tlb_fill
 and eliminated true misses, thus the lookup must be valid.
-Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1155
+This allows us to remove a warning comment from s390x.
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
+There doesn't seem to be a reason to change the code though.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: David Hildenbrand <david@redhat.com>
 Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/riscv/translate.c          | 17 +++++--
+ accel/tcg/cputlb.c            | 10 +++++++++-
- tests/tcg/riscv64/noexec.c        | 79 +++++++++++++++++++++++++++++++
+ target/s390x/tcg/mem_helper.c |  4 ----
- tests/tcg/riscv64/Makefile.target |  1 +
+files changed, 9 insertions(+), 5 deletions(-)
 files changed, 93 insertions(+), 4 deletions(-)
  create mode 100644 tests/tcg/riscv64/noexec.c
-diff --git a/target/riscv/translate.c b/target/riscv/translate.c
+diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 index XXXXXXX..XXXXXXX 100644
---- a/target/riscv/translate.c
+--- a/accel/tcg/cputlb.c
-+++ b/target/riscv/translate.c
++++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
+@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
      }
-     ctx->nftemp = 0;
+     tlb_addr = tlb_read_ofs(entry, elt_ofs);
-+    /* Only the first insn within a TB is allowed to cross a page boundary. */
++    flags = TLB_FLAGS_MASK;
-     if (ctx->base.is_jmp == DISAS_NEXT) {
+     page_addr = addr & TARGET_PAGE_MASK;
--        target_ulong page_start;
+     if (!tlb_hit_page(tlb_addr, page_addr)) {
--
+         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
--        page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
+@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
--        if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE) {
-+        if (!is_same_page(&ctx->base, ctx->base.pc_next)) {
+             /* TLB resize via tlb_fill may have moved the entry.  */
-             ctx->base.is_jmp = DISAS_TOO_MANY;
+             entry = tlb_entry(env, mmu_idx, addr);
 +        } else {
 +            unsigned page_ofs = ctx->base.pc_next & ~TARGET_PAGE_MASK;
 +
-+            if (page_ofs > TARGET_PAGE_SIZE - MAX_INSN_LEN) {
++            /*
-+                uint16_t next_insn = cpu_lduw_code(env, ctx->base.pc_next);
++             * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
-+                int len = insn_len(next_insn);
++             * to force the next access through tlb_fill.  We've just
-+
++             * called tlb_fill, so we know that this entry *is* valid.
-+                if (!is_same_page(&ctx->base, ctx->base.pc_next + len)) {
++             */
-+                    ctx->base.is_jmp = DISAS_TOO_MANY;
++            flags &= ~TLB_INVALID_MASK;
 +                }
 +            }
          }
+         tlb_addr = tlb_read_ofs(entry, elt_ofs);
      }
- }
+-    flags = tlb_addr & TLB_FLAGS_MASK;
-diff --git a/tests/tcg/riscv64/noexec.c b/tests/tcg/riscv64/noexec.c
++    flags &= tlb_addr;
-new file mode 100644
-index XXXXXXX..XXXXXXX
+     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
---- /dev/null
+     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
-+++ b/tests/tcg/riscv64/noexec.c
+diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
@@ -XXX,XX +XXX,XX @@
 +#include "../multiarch/noexec.c.inc"
 +
 +static void *arch_mcontext_pc(const mcontext_t *ctx)
 +{
 +    return (void *)ctx->__gregs[REG_PC];
 +}
 +
 +static int arch_mcontext_arg(const mcontext_t *ctx)
 +{
 +    return ctx->__gregs[REG_A0];
 +}
 +
 +static void arch_flush(void *p, int len)
 +{
 +    __builtin___clear_cache(p, p + len);
 +}
 +
 +extern char noexec_1[];
 +extern char noexec_2[];
 +extern char noexec_end[];
 +
 +asm(".option push\n"
 +    ".option norvc\n"
 +    "noexec_1:\n"
 +    "   li a0,1\n"       /* a0 is 0 on entry, set 1. */
 +    "noexec_2:\n"
 +    "   li a0,2\n"      /* a0 is 0/1; set 2. */
 +    "   ret\n"
 +    "noexec_end:\n"
 +    ".option pop");
 +
 +int main(void)
 +{
 +    struct noexec_test noexec_tests[] = {
 +        {
 +            .name = "fallthrough",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2,
 +            .entry_ofs = noexec_1 - noexec_2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = 0,
 +            .expected_arg = 1,
 +        },
 +        {
 +            .name = "jump",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2,
 +            .entry_ofs = 0,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = 0,
 +            .expected_arg = 0,
 +        },
 +        {
 +            .name = "fallthrough [cross]",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2 - 2,
 +            .entry_ofs = noexec_1 - noexec_2 - 2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = -2,
 +            .expected_arg = 1,
 +        },
 +        {
 +            .name = "jump [cross]",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2 - 2,
 +            .entry_ofs = -2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = -2,
 +            .expected_arg = 0,
 +        },
 +    };
 +
 +    return test_noexec(noexec_tests,
 +                       sizeof(noexec_tests) / sizeof(noexec_tests[0]));
 +}
 diff --git a/tests/tcg/riscv64/Makefile.target b/tests/tcg/riscv64/Makefile.target
 index XXXXXXX..XXXXXXX 100644
---- a/tests/tcg/riscv64/Makefile.target
+--- a/target/s390x/tcg/mem_helper.c
-+++ b/tests/tcg/riscv64/Makefile.target
++++ b/target/s390x/tcg/mem_helper.c
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
+ #else
- VPATH += $(SRC_PATH)/tests/tcg/riscv64
+     int flags;
- TESTS += test-div
-+TESTS += noexec
+-    /*
 -     * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
 -     * to detect if there was an exception during tlb_fill().
 -     */
      env->tlb_fill_exc = 0;
      flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
                                 ra);
 --
 .34.1

-[PULL 08/20] accel/tcg: Properly implement get_page_addr_code for user-only
+[PULL 07/20] accel/tcg: Introduce probe_access_full
-The current implementation is a no-op, simply returning addr.
+Add an interface to return the CPUTLBEntryFull struct
-This is incorrect, because we ought to be checking the page
+that goes with the lookup.  The result is not intended
-permissions for execution.
+to be valid across multiple lookups, so the user must
 use the results immediately.
-Make get_page_addr_code inline for both implementations.
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Acked-by: Alistair Francis <alistair.francis@wdc.com>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/exec/exec-all.h | 85 ++++++++++++++---------------------------
+ include/exec/exec-all.h | 15 +++++++++++++
- accel/tcg/cputlb.c      |  5 ---
+ include/qemu/typedefs.h |  1 +
- accel/tcg/user-exec.c   | 14 +++++++
+ accel/tcg/cputlb.c      | 47 +++++++++++++++++++++++++----------------
-files changed, 42 insertions(+), 62 deletions(-)
+files changed, 45 insertions(+), 18 deletions(-)
 diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/exec/exec-all.h
 +++ b/include/exec/exec-all.h
-@@ -XXX,XX +XXX,XX @@ struct MemoryRegionSection *iotlb_to_section(CPUState *cpu,
+@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
-                                              hwaddr index, MemTxAttrs attrs);
+                        MMUAccessType access_type, int mmu_idx,
- #endif
+                        bool nonfault, void **phost, uintptr_t retaddr);
--#if defined(CONFIG_USER_ONLY)
++#ifndef CONFIG_USER_ONLY
--void mmap_lock(void);
++/**
--void mmap_unlock(void);
++ * probe_access_full:
--bool have_mmap_lock(void);
++ * Like probe_access_flags, except also return into @pfull.
 -
  /**
 - * get_page_addr_code() - user-mode version
 + * get_page_addr_code_hostp()
   * @env: CPUArchState
   * @addr: guest virtual address of guest code
   *
 - * Returns @addr.
 + * See get_page_addr_code() (full-system version) for documentation on the
 + * return value.
 + *
-+ * Sets *@hostp (when @hostp is non-NULL) as follows.
++ * The CPUTLBEntryFull structure returned via @pfull is transient
-+ * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
++ * and must be consumed or copied immediately, before any further
-+ * to the host address where @addr's content is kept.
++ * access or changes to TLB @mmu_idx.
 + *
 + * Note: this function can trigger an exception.
 + */
-+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
++int probe_access_full(CPUArchState *env, target_ulong addr,
-+                                        void **hostp);
++                      MMUAccessType access_type, int mmu_idx,
 +                      bool nonfault, void **phost,
 +                      CPUTLBEntryFull **pfull, uintptr_t retaddr);
 +#endif
 +
-+/**
+ #define CODE_GEN_ALIGN           16 /* must be >= of the size of a icache line */
-+ * get_page_addr_code()
-+ * @env: CPUArchState
+ /* Estimated block size for TB allocation.  */
-+ * @addr: guest virtual address of guest code
+diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
-+ *
+index XXXXXXX..XXXXXXX 100644
-+ * If we cannot translate and execute from the entire RAM page, or if
+--- a/include/qemu/typedefs.h
-+ * the region is not backed by RAM, returns -1. Otherwise, returns the
++++ b/include/qemu/typedefs.h
-+ * ram_addr_t corresponding to the guest code at @addr.
+@@ -XXX,XX +XXX,XX @@ typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
-+ *
+ typedef struct CPUAddressSpace CPUAddressSpace;
-+ * Note: this function can trigger an exception.
+ typedef struct CPUArchState CPUArchState;
-  */
+ typedef struct CPUState CPUState;
- static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
++typedef struct CPUTLBEntryFull CPUTLBEntryFull;
-                                                 target_ulong addr)
+ typedef struct DeviceListener DeviceListener;
- {
+ typedef struct DeviceState DeviceState;
--    return addr;
+ typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot;
 +    return get_page_addr_code_hostp(env, addr, NULL);
  }
 -/**
 - * get_page_addr_code_hostp() - user-mode version
 - * @env: CPUArchState
 - * @addr: guest virtual address of guest code
 - *
 - * Returns @addr.
 - *
 - * If @hostp is non-NULL, sets *@hostp to the host address where @addr's content
 - * is kept.
 - */
 -static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env,
 -                                                      target_ulong addr,
 -                                                      void **hostp)
 -{
 -    if (hostp) {
 -        *hostp = g2h_untagged(addr);
 -    }
 -    return addr;
 -}
 +#if defined(CONFIG_USER_ONLY)
 +void mmap_lock(void);
 +void mmap_unlock(void);
 +bool have_mmap_lock(void);
  /**
   * adjust_signal_pc:
@@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
  static inline void mmap_lock(void) {}
  static inline void mmap_unlock(void) {}
 -/**
 - * get_page_addr_code() - full-system version
 - * @env: CPUArchState
 - * @addr: guest virtual address of guest code
 - *
 - * If we cannot translate and execute from the entire RAM page, or if
 - * the region is not backed by RAM, returns -1. Otherwise, returns the
 - * ram_addr_t corresponding to the guest code at @addr.
 - *
 - * Note: this function can trigger an exception.
 - */
 -tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr);
 -
 -/**
 - * get_page_addr_code_hostp() - full-system version
 - * @env: CPUArchState
 - * @addr: guest virtual address of guest code
 - *
 - * See get_page_addr_code() (full-system version) for documentation on the
 - * return value.
 - *
 - * Sets *@hostp (when @hostp is non-NULL) as follows.
 - * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
 - * to the host address where @addr's content is kept.
 - *
 - * Note: this function can trigger an exception.
 - */
 -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
 -                                        void **hostp);
 -
  void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
  void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
 diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cputlb.c
 +++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+@@ -XXX,XX +XXX,XX @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
-     return qemu_ram_addr_from_host_nofail(p);
+ static int probe_access_internal(CPUArchState *env, target_ulong addr,
                                   int fault_size, MMUAccessType access_type,
                                   int mmu_idx, bool nonfault,
 -                                 void **phost, uintptr_t retaddr)
 +                                 void **phost, CPUTLBEntryFull **pfull,
 +                                 uintptr_t retaddr)
  {
      uintptr_t index = tlb_index(env, mmu_idx, addr);
      CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
                                             mmu_idx, nonfault, retaddr)) {
                  /* Non-faulting page table read failed.  */
                  *phost = NULL;
 +                *pfull = NULL;
                  return TLB_INVALID_MASK;
              }
              /* TLB resize via tlb_fill may have moved the entry.  */
 +            index = tlb_index(env, mmu_idx, addr);
              entry = tlb_entry(env, mmu_idx, addr);
              /*
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
      }
      flags &= tlb_addr;
 +    *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 +
      /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
      if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
          *phost = NULL;
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
      return flags;
  }
--tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
+-int probe_access_flags(CPUArchState *env, target_ulong addr,
--{
+-                       MMUAccessType access_type, int mmu_idx,
--    return get_page_addr_code_hostp(env, addr, NULL);
+-                       bool nonfault, void **phost, uintptr_t retaddr)
--}
++int probe_access_full(CPUArchState *env, target_ulong addr,
 +                      MMUAccessType access_type, int mmu_idx,
 +                      bool nonfault, void **phost, CPUTLBEntryFull **pfull,
 +                      uintptr_t retaddr)
  {
 -    int flags;
 -
- static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
+-    flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
-                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
+-                                  nonfault, phost, retaddr);
- {
++    int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
-diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
++                                      nonfault, phost, pfull, retaddr);
-index XXXXXXX..XXXXXXX 100644
---- a/accel/tcg/user-exec.c
+     /* Handle clean RAM pages.  */
-+++ b/accel/tcg/user-exec.c
+     if (unlikely(flags & TLB_NOTDIRTY)) {
-@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
+-        uintptr_t index = tlb_index(env, mmu_idx, addr);
-     return size ? g2h(env_cpu(env), addr) : NULL;
+-        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 -
 -        notdirty_write(env_cpu(env), addr, 1, full, retaddr);
 +        notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
          flags &= ~TLB_NOTDIRTY;
      }
      return flags;
  }
-+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
++int probe_access_flags(CPUArchState *env, target_ulong addr,
-+                                        void **hostp)
++                       MMUAccessType access_type, int mmu_idx,
 +                       bool nonfault, void **phost, uintptr_t retaddr)
 +{
-+    int flags;
++    CPUTLBEntryFull *full;
 +
-+    flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
++    return probe_access_full(env, addr, access_type, mmu_idx,
-+    g_assert(flags == 0);
++                             nonfault, phost, &full, retaddr);
 +
 +    if (hostp) {
 +        *hostp = g2h_untagged(addr);
 +    }
 +    return addr;
 +}
 +
- /* The softmmu versions of these helpers are in cputlb.c.  */
+ void *probe_access(CPUArchState *env, target_ulong addr, int size,
+                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
- /*
+ {
 +    CPUTLBEntryFull *full;
      void *host;
      int flags;
      g_assert(-(addr | TARGET_PAGE_MASK) >= size);
      flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
 -                                  false, &host, retaddr);
 +                                  false, &host, &full, retaddr);
      /* Per the interface, size == 0 merely faults the access. */
      if (size == 0) {
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
      }
      if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
 -        uintptr_t index = tlb_index(env, mmu_idx, addr);
 -        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 -
          /* Handle watchpoints.  */
          if (flags & TLB_WATCHPOINT) {
              int wp_access = (access_type == MMU_DATA_STORE
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
  void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
                          MMUAccessType access_type, int mmu_idx)
  {
 +    CPUTLBEntryFull *full;
      void *host;
      int flags;
      flags = probe_access_internal(env, addr, 0, access_type,
 -                                  mmu_idx, true, &host, 0);
 +                                  mmu_idx, true, &host, &full, 0);
      /* No combination of flags are expected by the caller. */
      return flags ? NULL : host;
@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
  tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
                                          void **hostp)
  {
 +    CPUTLBEntryFull *full;
      void *p;
      (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
 -                                cpu_mmu_index(env, true), false, &p, 0);
 +                                cpu_mmu_index(env, true), false, &p, &full, 0);
      if (p == NULL) {
          return -1;
      }
 --
 .34.1

-[PULL 1/4] target/avr: Support probe argument to tlb_fill
+[PULL 08/20] accel/tcg: Introduce tlb_set_page_full
-While there are no target-specific nonfaulting probes,
+Now that we have collected all of the page data into
-generic code may grow some uses at some point.
+CPUTLBEntryFull, provide an interface to record that
 all in one go, instead of using 4 arguments.  This interface
 allows CPUTLBEntryFull to be extended without having to
 change the number of arguments.
-Note that the attrs argument was incorrect -- it should have
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-been MEMTXATTRS_UNSPECIFIED. Just use the simpler interface.
+Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/avr/helper.c | 46 ++++++++++++++++++++++++++++-----------------
+ include/exec/cpu-defs.h | 14 +++++++++++
-file changed, 29 insertions(+), 17 deletions(-)
+ include/exec/exec-all.h | 22 ++++++++++++++++++
  accel/tcg/cputlb.c      | 51 ++++++++++++++++++++++++++---------------
 files changed, 69 insertions(+), 18 deletions(-)
-diff --git a/target/avr/helper.c b/target/avr/helper.c
+diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
 index XXXXXXX..XXXXXXX 100644
---- a/target/avr/helper.c
+--- a/include/exec/cpu-defs.h
-+++ b/target/avr/helper.c
++++ b/include/exec/cpu-defs.h
-@@ -XXX,XX +XXX,XX @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
-                       MMUAccessType access_type, int mmu_idx,
+      *     + the offset within the target MemoryRegion (otherwise)
-                       bool probe, uintptr_t retaddr)
+      */
      hwaddr xlat_section;
 +
 +    /*
 +     * @phys_addr contains the physical address in the address space
 +     * given by cpu_asidx_from_attrs(cpu, @attrs).
 +     */
 +    hwaddr phys_addr;
 +
 +    /* @attrs contains the memory transaction attributes for the page. */
      MemTxAttrs attrs;
 +
 +    /* @prot contains the complete protections for the page. */
 +    uint8_t prot;
 +
 +    /* @lg_page_size contains the log2 of the page size. */
 +    uint8_t lg_page_size;
  } CPUTLBEntryFull;
  /*
 diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/exec/exec-all.h
 +++ b/include/exec/exec-all.h
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
                                                 uint16_t idxmap,
                                                 unsigned bits);
 +/**
 + * tlb_set_page_full:
 + * @cpu: CPU context
 + * @mmu_idx: mmu index of the tlb to modify
 + * @vaddr: virtual address of the entry to add
 + * @full: the details of the tlb entry
 + *
 + * Add an entry to @cpu tlb index @mmu_idx.  All of the fields of
 + * @full must be filled, except for xlat_section, and constitute
 + * the complete description of the translated page.
 + *
 + * This is generally called by the target tlb_fill function after
 + * having performed a successful page table walk to find the physical
 + * address and attributes for the translation.
 + *
 + * At most one entry for a given virtual address is permitted. Only a
 + * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
 + * used by tlb_flush_page.
 + */
 +void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
 +                       CPUTLBEntryFull *full);
 +
  /**
   * tlb_set_page_with_attrs:
   * @cpu: CPU to add this TLB entry for
 diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cputlb.c
 +++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
      env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
  }
 -/* Add a new TLB entry. At most one entry for a given virtual address
 +/*
 + * Add a new TLB entry. At most one entry for a given virtual address
   * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
   * supplied size is only used by tlb_flush_page.
   *
   * Called from TCG-generated code, which is under an RCU read-side
   * critical section.
   */
 -void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
 -                             hwaddr paddr, MemTxAttrs attrs, int prot,
 -                             int mmu_idx, target_ulong size)
 +void tlb_set_page_full(CPUState *cpu, int mmu_idx,
 +                       target_ulong vaddr, CPUTLBEntryFull *full)
  {
--    int prot = 0;
+     CPUArchState *env = cpu->env_ptr;
--    MemTxAttrs attrs = {};
+     CPUTLB *tlb = env_tlb(env);
-+    int prot, page_size = TARGET_PAGE_SIZE;
+@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
-     uint32_t paddr;
+     CPUTLBEntry *te, tn;
+     hwaddr iotlb, xlat, sz, paddr_page;
-     address &= TARGET_PAGE_MASK;
+     target_ulong vaddr_page;
+-    int asidx = cpu_asidx_from_attrs(cpu, attrs);
-     if (mmu_idx == MMU_CODE_IDX) {
+-    int wp_flags;
--        /* access to code in flash */
++    int asidx, wp_flags, prot;
-+        /* Access to code in flash. */
+     bool is_ram, is_romd;
-         paddr = OFFSET_CODE + address;
-         prot = PAGE_READ | PAGE_EXEC;
+     assert_cpu_is_self(cpu);
--        if (paddr + TARGET_PAGE_SIZE > OFFSET_DATA) {
-+        if (paddr >= OFFSET_DATA) {
+-    if (size <= TARGET_PAGE_SIZE) {
-+            /*
++    if (full->lg_page_size <= TARGET_PAGE_BITS) {
-+             * This should not be possible via any architectural operations.
+         sz = TARGET_PAGE_SIZE;
 +             * There is certainly not an exception that we can deliver.
 +             * Accept probing that might come from generic code.
 +             */
 +            if (probe) {
 +                return false;
 +            }
              error_report("execution left flash memory");
              abort();
          }
 -    } else if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
 -        /*
 -         * access to CPU registers, exit and rebuilt this TB to use full access
 -         * incase it touches specially handled registers like SREG or SP
 -         */
 -        AVRCPU *cpu = AVR_CPU(cs);
 -        CPUAVRState *env = &cpu->env;
 -        env->fullacc = 1;
 -        cpu_loop_exit_restore(cs, retaddr);
      } else {
--        /* access to memory. nothing special */
+-        tlb_add_large_page(env, mmu_idx, vaddr, size);
-+        /* Access to memory. */
+-        sz = size;
-         paddr = OFFSET_DATA + address;
++        sz = (hwaddr)1 << full->lg_page_size;
-         prot = PAGE_READ | PAGE_WRITE;
++        tlb_add_large_page(env, mmu_idx, vaddr, sz);
 +        if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
 +            /*
 +             * Access to CPU registers, exit and rebuilt this TB to use
 +             * full access in case it touches specially handled registers
 +             * like SREG or SP.  For probing, set page_size = 1, in order
 +             * to force tlb_fill to be called for the next access.
 +             */
 +            if (probe) {
 +                page_size = 1;
 +            } else {
 +                AVRCPU *cpu = AVR_CPU(cs);
 +                CPUAVRState *env = &cpu->env;
 +                env->fullacc = 1;
 +                cpu_loop_exit_restore(cs, retaddr);
 +            }
 +        }
      }
+     vaddr_page = vaddr & TARGET_PAGE_MASK;
--    tlb_set_page_with_attrs(cs, address, paddr, attrs, prot,
+-    paddr_page = paddr & TARGET_PAGE_MASK;
--                            mmu_idx, TARGET_PAGE_SIZE);
++    paddr_page = full->phys_addr & TARGET_PAGE_MASK;
--
-+    tlb_set_page(cs, address, paddr, prot, mmu_idx, page_size);
++    prot = full->prot;
-     return true;
++    asidx = cpu_asidx_from_attrs(cpu, full->attrs);
      section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
 -                                                &xlat, &sz, attrs, &prot);
 +                                                &xlat, &sz, full->attrs, &prot);
      assert(sz >= TARGET_PAGE_SIZE);
      tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
                " prot=%x idx=%d\n",
 -              vaddr, paddr, prot, mmu_idx);
 +              vaddr, full->phys_addr, prot, mmu_idx);
      address = vaddr_page;
 -    if (size < TARGET_PAGE_SIZE) {
 +    if (full->lg_page_size < TARGET_PAGE_BITS) {
          /* Repeat the MMU check and TLB fill on every access.  */
          address |= TLB_INVALID_MASK;
      }
 -    if (attrs.byte_swap) {
 +    if (full->attrs.byte_swap) {
          address |= TLB_BSWAP;
      }
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
       * subtract here is that of the page base, and not the same as the
       * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
       */
 +    desc->fulltlb[index] = *full;
      desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
 -    desc->fulltlb[index].attrs = attrs;
 +    desc->fulltlb[index].phys_addr = paddr_page;
 +    desc->fulltlb[index].prot = prot;
      /* Now calculate the new entry */
      tn.addend = addend - vaddr_page;
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
      qemu_spin_unlock(&tlb->c.lock);
  }
+-/* Add a new TLB entry, but without specifying the memory
+- * transaction attributes to be used.
+- */
++void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
++                             hwaddr paddr, MemTxAttrs attrs, int prot,
++                             int mmu_idx, target_ulong size)
++{
++    CPUTLBEntryFull full = {
++        .phys_addr = paddr,
++        .attrs = attrs,
++        .prot = prot,
++        .lg_page_size = ctz64(size)
++    };
++
++    assert(is_power_of_2(size));
++    tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
++}
++
+ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
+                   hwaddr paddr, int prot,
+                   int mmu_idx, target_ulong size)
 --
 .34.1

-[PULL 3/4] target/avr: Only execute one interrupt at a time
+[PULL 09/20] include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
-We cannot deliver two interrupts simultaneously;
+Allow the target to cache items from the guest page tables.
 the first interrupt handler must execute first.
-Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
 Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/avr/helper.c | 9 +++------
+ include/exec/cpu-defs.h | 9 +++++++++
-file changed, 3 insertions(+), 6 deletions(-)
+file changed, 9 insertions(+)
-diff --git a/target/avr/helper.c b/target/avr/helper.c
+diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
 index XXXXXXX..XXXXXXX 100644
---- a/target/avr/helper.c
+--- a/include/exec/cpu-defs.h
-+++ b/target/avr/helper.c
++++ b/include/exec/cpu-defs.h
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
- bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
+     /* @lg_page_size contains the log2 of the page size. */
- {
+     uint8_t lg_page_size;
--    bool ret = false;
++
-     AVRCPU *cpu = AVR_CPU(cs);
++    /*
-     CPUAVRState *env = &cpu->env;
++     * Allow target-specific additions to this structure.
++     * This may be used to cache items from the guest cpu
-@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
++     * page tables for later use by the implementation.
-             avr_cpu_do_interrupt(cs);
++     */
++#ifdef TARGET_PAGE_ENTRY_EXTRA
-             cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
++    TARGET_PAGE_ENTRY_EXTRA
--
++#endif
--            ret = true;
+ } CPUTLBEntryFull;
-+            return true;
-         }
+ /*
      }
      if (interrupt_request & CPU_INTERRUPT_HARD) {
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
              if (!env->intsrc) {
                  cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
              }
 -
 -            ret = true;
 +            return true;
          }
      }
 -    return ret;
 +    return false;
  }
  void avr_cpu_do_interrupt(CPUState *cs)
 --
 .34.1

-[PULL 16/20] accel/tcg: Add fast path for translator_ld*
+[PULL 10/20] accel/tcg: Remove PageDesc code_bitmap
-Cache the translation from guest to host address, so we may
+This bitmap is created and discarded immediately.
-use direct loads when we hit on the primary translation page.
+We gain nothing by its existence.
-Look up the second translation page only once, during translation.
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-This obviates another lookup of the second page within tb_gen_code
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-after translation.
+Message-Id: <20220822232338.1727934-2-richard.henderson@linaro.org>
 ---
  accel/tcg/translate-all.c | 78 ++-------------------------------------
 file changed, 4 insertions(+), 74 deletions(-)
-Fixes a bug in that plugin_insn_append should be passed the bytes
-in the original memory order, not bswapped by pieces.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- include/exec/translator.h |  63 +++++++++++--------
- accel/tcg/translate-all.c |  23 +++----
- accel/tcg/translator.c    | 126 +++++++++++++++++++++++++++++---------
-files changed, 141 insertions(+), 71 deletions(-)
-diff --git a/include/exec/translator.h b/include/exec/translator.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/exec/translator.h
-+++ b/include/exec/translator.h
-@@ -XXX,XX +XXX,XX @@ typedef enum DisasJumpType {
-  * Architecture-agnostic disassembly context.
-  */
- typedef struct DisasContextBase {
--    const TranslationBlock *tb;
-+    TranslationBlock *tb;
-     target_ulong pc_first;
-     target_ulong pc_next;
-     DisasJumpType is_jmp;
-     int num_insns;
-     int max_insns;
-     bool singlestep_enabled;
--#ifdef CONFIG_USER_ONLY
--    /*
--     * Guest address of the last byte of the last protected page.
--     *
--     * Pages containing the translated instructions are made non-writable in
--     * order to achieve consistency in case another thread is modifying the
--     * code while translate_insn() fetches the instruction bytes piecemeal.
--     * Such writer threads are blocked on mmap_lock() in page_unprotect().
--     */
--    target_ulong page_protect_end;
--#endif
-+    void *host_addr[2];
- } DisasContextBase;
- /**
-@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
-  * the relevant information at translation time.
-  */
--#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn)             \
--    type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
--                           abi_ptr pc, bool do_swap);                   \
--    static inline type fullname(CPUArchState *env,                      \
--                                DisasContextBase *dcbase, abi_ptr pc)   \
--    {                                                                   \
--        return fullname ## _swap(env, dcbase, pc, false);               \
-+uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
-+uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
-+uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
-+uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
-+
-+static inline uint16_t
-+translator_lduw_swap(CPUArchState *env, DisasContextBase *db,
-+                     abi_ptr pc, bool do_swap)
-+{
-+    uint16_t ret = translator_lduw(env, db, pc);
-+    if (do_swap) {
-+        ret = bswap16(ret);
-     }
-+    return ret;
-+}
--#define FOR_EACH_TRANSLATOR_LD(F)                                       \
--    F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)           \
--    F(translator_lduw, uint16_t, cpu_lduw_code, bswap16)                \
--    F(translator_ldl, uint32_t, cpu_ldl_code, bswap32)                  \
--    F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
-+static inline uint32_t
-+translator_ldl_swap(CPUArchState *env, DisasContextBase *db,
-+                    abi_ptr pc, bool do_swap)
-+{
-+    uint32_t ret = translator_ldl(env, db, pc);
-+    if (do_swap) {
-+        ret = bswap32(ret);
-+    }
-+    return ret;
-+}
--FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
--
--#undef GEN_TRANSLATOR_LD
-+static inline uint64_t
-+translator_ldq_swap(CPUArchState *env, DisasContextBase *db,
-+                    abi_ptr pc, bool do_swap)
-+{
-+    uint64_t ret = translator_ldq_swap(env, db, pc, false);
-+    if (do_swap) {
-+        ret = bswap64(ret);
-+    }
-+    return ret;
-+}
- /*
-  * Return whether addr is on the same page as where disassembly started.
 diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translate-all.c
 +++ b/accel/tcg/translate-all.c
-@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
+@@ -XXX,XX +XXX,XX @@
- {
+ #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
-     CPUArchState *env = cpu->env_ptr;
+ #endif
-     TranslationBlock *tb, *existing_tb;
--    tb_page_addr_t phys_pc, phys_page2;
+-#define SMC_BITMAP_USE_THRESHOLD 10
--    target_ulong virt_page2;
+-
-+    tb_page_addr_t phys_pc;
+ typedef struct PageDesc {
-     tcg_insn_unit *gen_code_buf;
+     /* list of TBs intersecting this ram page */
-     int gen_code_size, search_size, max_insns;
+     uintptr_t first_tb;
- #ifdef CONFIG_PROFILER
+-#ifdef CONFIG_SOFTMMU
-@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
+-    /* in order to optimize self modifying code, we count the number
-     tb->flags = flags;
+-       of lookups we do to a given page to use a bitmap */
-     tb->cflags = cflags;
+-    unsigned long *code_bitmap;
-     tb->trace_vcpu_dstate = *cpu->trace_dstate;
+-    unsigned int code_write_count;
-+    tb->page_addr[0] = phys_pc;
+-#else
-+    tb->page_addr[1] = -1;
++#ifdef CONFIG_USER_ONLY
-     tcg_ctx->tb_cflags = cflags;
+     unsigned long flags;
-  tb_overflow:
+     void *target_data;
+ #endif
-@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
+-#ifndef CONFIG_USER_ONLY
-     }
++#ifdef CONFIG_SOFTMMU
+     QemuSpin lock;
-     /*
+ #endif
--     * If the TB is not associated with a physical RAM page then
+ } PageDesc;
--     * it must be a temporary one-insn TB, and we have nothing to do
+@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void)
--     * except fill in the page_addr[] fields. Return early before
+     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
 -     * attempting to link to other TBs or add to the lookup table.
 +     * If the TB is not associated with a physical RAM page then it must be
 +     * a temporary one-insn TB, and we have nothing left to do. Return early
 +     * before attempting to link to other TBs or add to the lookup table.
       */
 -    if (phys_pc == -1) {
 -        tb->page_addr[0] = tb->page_addr[1] = -1;
 +    if (tb->page_addr[0] == -1) {
          return tb;
      }
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
       */
      tcg_tb_insert(tb);
 -    /* check next page if needed */
 -    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
 -    phys_page2 = -1;
 -    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
 -        phys_page2 = get_page_addr_code(env, virt_page2);
 -    }
      /*
       * No explicit memory barrier is required -- tb_link_page() makes the
       * TB visible in a consistent state.
       */
 -    existing_tb = tb_link_page(tb, phys_pc, phys_page2);
 +    existing_tb = tb_link_page(tb, tb->page_addr[0], tb->page_addr[1]);
      /* if the TB already exists, discard what we just translated */
      if (unlikely(existing_tb != tb)) {
          uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
 diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translator.c
 +++ b/accel/tcg/translator.c
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
      return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
  }
--static inline void translator_page_protect(DisasContextBase *dcbase,
+-/* call with @p->lock held */
--                                           target_ulong pc)
+-static inline void invalidate_page_bitmap(PageDesc *p)
 -{
--#ifdef CONFIG_USER_ONLY
+-    assert_page_locked(p);
--    dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK;
+-#ifdef CONFIG_SOFTMMU
--    page_protect(pc);
+-    g_free(p->code_bitmap);
 -    p->code_bitmap = NULL;
 -    p->code_write_count = 0;
 -#endif
 -}
 -
- void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
+ /* Set to NULL all the 'first_tb' fields in all PageDescs. */
-                      target_ulong pc, void *host_pc,
+ static void page_flush_tb_1(int level, void **lp)
-                      const TranslatorOps *ops, DisasContextBase *db)
+ {
-@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
+@@ -XXX,XX +XXX,XX @@ static void page_flush_tb_1(int level, void **lp)
-     db->num_insns = 0;
+         for (i = 0; i < V_L2_SIZE; ++i) {
-     db->max_insns = max_insns;
+             page_lock(&pd[i]);
-     db->singlestep_enabled = cflags & CF_SINGLE_STEP;
+             pd[i].first_tb = (uintptr_t)NULL;
--    translator_page_protect(db, db->pc_next);
+-            invalidate_page_bitmap(pd + i);
-+    db->host_addr[0] = host_pc;
+             page_unlock(&pd[i]);
-+    db->host_addr[1] = NULL;
+         }
-+
+     } else {
-+#ifdef CONFIG_USER_ONLY
+@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
-+    page_protect(pc);
+     if (rm_from_page_list) {
-+#endif
+         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
+         tb_page_remove(p, tb);
-     ops->init_disas_context(db, cpu);
+-        invalidate_page_bitmap(p);
-     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
+         if (tb->page_addr[1] != -1) {
-@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
+             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
              tb_page_remove(p, tb);
 -            invalidate_page_bitmap(p);
          }
      }
@@ -XXX,XX +XXX,XX @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
      }
  }
 -#ifdef CONFIG_SOFTMMU
 -/* call with @p->lock held */
 -static void build_page_bitmap(PageDesc *p)
 -{
 -    int n, tb_start, tb_end;
 -    TranslationBlock *tb;
 -
 -    assert_page_locked(p);
 -    p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
 -
 -    PAGE_FOR_EACH_TB(p, tb, n) {
 -        /* NOTE: this is subtle as a TB may span two physical pages */
 -        if (n == 0) {
 -            /* NOTE: tb_end may be after the end of the page, but
 -               it is not a problem */
 -            tb_start = tb->pc & ~TARGET_PAGE_MASK;
 -            tb_end = tb_start + tb->size;
 -            if (tb_end > TARGET_PAGE_SIZE) {
 -                tb_end = TARGET_PAGE_SIZE;
 -             }
 -        } else {
 -            tb_start = 0;
 -            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
 -        }
 -        bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
 -    }
 -}
 -#endif
 -
  /* add the tb in the target page and protect it if necessary
   *
   * Called with mmap_lock held for user-mode emulation.
@@ -XXX,XX +XXX,XX @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
      page_already_protected = p->first_tb != (uintptr_t)NULL;
  #endif
+     p->first_tb = (uintptr_t)tb | n;
+-    invalidate_page_bitmap(p);
+ #if defined(CONFIG_USER_ONLY)
+     /* translator_loop() must have made all TB pages non-writable */
+@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+     /* remove TB from the page(s) if we couldn't insert it */
+     if (unlikely(existing_tb)) {
+         tb_page_remove(p, tb);
+-        invalidate_page_bitmap(p);
+         if (p2) {
+             tb_page_remove(p2, tb);
+-            invalidate_page_bitmap(p2);
+         }
+         tb = existing_tb;
+     }
+@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
+ #if !defined(CONFIG_USER_ONLY)
+     /* if no code remaining, no need to continue to use slow writes */
+     if (!p->first_tb) {
+-        invalidate_page_bitmap(p);
+         tlb_unprotect_code(start);
+     }
+ #endif
+@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page_fast(struct page_collection *pages,
+     }
+     assert_page_locked(p);
+-    if (!p->code_bitmap &&
+-        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
+-        build_page_bitmap(p);
+-    }
+-    if (p->code_bitmap) {
+-        unsigned int nr;
+-        unsigned long b;
+-
+-        nr = start & ~TARGET_PAGE_MASK;
+-        b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
+-        if (b & ((1 << len) - 1)) {
+-            goto do_invalidate;
+-        }
+-    } else {
+-    do_invalidate:
+-        tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
+-                                              retaddr);
+-    }
++    tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
++                                          retaddr);
  }
+ #else
--static inline void translator_maybe_page_protect(DisasContextBase *dcbase,
+ /* Called with mmap_lock held. If pc is not 0 then it indicates the
 -                                                 target_ulong pc, size_t len)
 +static void *translator_access(CPUArchState *env, DisasContextBase *db,
 +                               target_ulong pc, size_t len)
  {
 -#ifdef CONFIG_USER_ONLY
 -    target_ulong end = pc + len - 1;
 +    void *host;
 +    target_ulong base, end;
 +    TranslationBlock *tb;
 -    if (end > dcbase->page_protect_end) {
 -        translator_page_protect(dcbase, end);
 +    tb = db->tb;
 +
 +    /* Use slow path if first page is MMIO. */
 +    if (unlikely(tb->page_addr[0] == -1)) {
 +        return NULL;
      }
 +
 +    end = pc + len - 1;
 +    if (likely(is_same_page(db, end))) {
 +        host = db->host_addr[0];
 +        base = db->pc_first;
 +    } else {
 +        host = db->host_addr[1];
 +        base = TARGET_PAGE_ALIGN(db->pc_first);
 +        if (host == NULL) {
 +            tb->page_addr[1] =
 +                get_page_addr_code_hostp(env, base, &db->host_addr[1]);
 +#ifdef CONFIG_USER_ONLY
 +            page_protect(end);
  #endif
 +            /* We cannot handle MMIO as second page. */
 +            assert(tb->page_addr[1] != -1);
 +            host = db->host_addr[1];
 +        }
 +
 +        /* Use slow path when crossing pages. */
 +        if (is_same_page(db, pc)) {
 +            return NULL;
 +        }
 +    }
 +
 +    tcg_debug_assert(pc >= base);
 +    return host + (pc - base);
  }
 -#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn)             \
 -    type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
 -                           abi_ptr pc, bool do_swap)                    \
 -    {                                                                   \
 -        translator_maybe_page_protect(dcbase, pc, sizeof(type));        \
 -        type ret = load_fn(env, pc);                                    \
 -        if (do_swap) {                                                  \
 -            ret = swap_fn(ret);                                         \
 -        }                                                               \
 -        plugin_insn_append(pc, &ret, sizeof(ret));                      \
 -        return ret;                                                     \
 +uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
 +{
 +    uint8_t ret;
 +    void *p = translator_access(env, db, pc, sizeof(ret));
 +
 +    if (p) {
 +        plugin_insn_append(pc, p, sizeof(ret));
 +        return ldub_p(p);
      }
 +    ret = cpu_ldub_code(env, pc);
 +    plugin_insn_append(pc, &ret, sizeof(ret));
 +    return ret;
 +}
 -FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
 +uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
 +{
 +    uint16_t ret, plug;
 +    void *p = translator_access(env, db, pc, sizeof(ret));
 -#undef GEN_TRANSLATOR_LD
 +    if (p) {
 +        plugin_insn_append(pc, p, sizeof(ret));
 +        return lduw_p(p);
 +    }
 +    ret = cpu_lduw_code(env, pc);
 +    plug = tswap16(ret);
 +    plugin_insn_append(pc, &plug, sizeof(ret));
 +    return ret;
 +}
 +
 +uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
 +{
 +    uint32_t ret, plug;
 +    void *p = translator_access(env, db, pc, sizeof(ret));
 +
 +    if (p) {
 +        plugin_insn_append(pc, p, sizeof(ret));
 +        return ldl_p(p);
 +    }
 +    ret = cpu_ldl_code(env, pc);
 +    plug = tswap32(ret);
 +    plugin_insn_append(pc, &plug, sizeof(ret));
 +    return ret;
 +}
 +
 +uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
 +{
 +    uint64_t ret, plug;
 +    void *p = translator_access(env, db, pc, sizeof(ret));
 +
 +    if (p) {
 +        plugin_insn_append(pc, p, sizeof(ret));
 +        return ldq_p(p);
 +    }
 +    ret = cpu_ldq_code(env, pc);
 +    plug = tswap64(ret);
 +    plugin_insn_append(pc, &plug, sizeof(ret));
 +    return ret;
 +}
 --
 .34.1

-[PULL 15/20] accel/tcg: Add pc and host_pc params to gen_intermediate_code
+[PULL 11/20] accel/tcg: Use bool for page_find_alloc
-Pass these along to translator_loop -- pc may be used instead
+Bool is more appropriate type for the alloc parameter.
 of tb->pc, and host_pc is currently unused.  Adjust all targets
 at one time.
-Acked-by: Alistair Francis <alistair.francis@wdc.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/exec/exec-all.h       |  1 -
+ accel/tcg/translate-all.c | 14 +++++++-------
- include/exec/translator.h     | 24 ++++++++++++++++++++----
+file changed, 7 insertions(+), 7 deletions(-)
  accel/tcg/translate-all.c     |  6 ++++--
  accel/tcg/translator.c        |  9 +++++----
  target/alpha/translate.c      |  5 +++--
  target/arm/translate.c        |  5 +++--
  target/avr/translate.c        |  5 +++--
  target/cris/translate.c       |  5 +++--
  target/hexagon/translate.c    |  6 ++++--
  target/hppa/translate.c       |  5 +++--
  target/i386/tcg/translate.c   |  5 +++--
  target/loongarch/translate.c  |  6 ++++--
  target/m68k/translate.c       |  5 +++--
  target/microblaze/translate.c |  5 +++--
  target/mips/tcg/translate.c   |  5 +++--
  target/nios2/translate.c      |  5 +++--
  target/openrisc/translate.c   |  6 ++++--
  target/ppc/translate.c        |  5 +++--
  target/riscv/translate.c      |  5 +++--
  target/rx/translate.c         |  5 +++--
  target/s390x/tcg/translate.c  |  5 +++--
  target/sh4/translate.c        |  5 +++--
  target/sparc/translate.c      |  5 +++--
  target/tricore/translate.c    |  6 ++++--
  target/xtensa/translate.c     |  6 ++++--
 files changed, 97 insertions(+), 53 deletions(-)
-diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/exec/exec-all.h
-+++ b/include/exec/exec-all.h
-@@ -XXX,XX +XXX,XX @@ typedef ram_addr_t tb_page_addr_t;
- #define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
- #endif
--void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
- void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb,
-                           target_ulong *data);
-diff --git a/include/exec/translator.h b/include/exec/translator.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/exec/translator.h
-+++ b/include/exec/translator.h
-@@ -XXX,XX +XXX,XX @@
- #include "exec/translate-all.h"
- #include "tcg/tcg.h"
-+/**
-+ * gen_intermediate_code
-+ * @cpu: cpu context
-+ * @tb: translation block
-+ * @max_insns: max number of instructions to translate
-+ * @pc: guest virtual program counter address
-+ * @host_pc: host physical program counter address
-+ *
-+ * This function must be provided by the target, which should create
-+ * the target-specific DisasContext, and then invoke translator_loop.
-+ */
-+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
-+                           target_ulong pc, void *host_pc);
- /**
-  * DisasJumpType:
-@@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps {
- /**
-  * translator_loop:
-- * @ops: Target-specific operations.
-- * @db: Disassembly context.
-  * @cpu: Target vCPU.
-  * @tb: Translation block.
-  * @max_insns: Maximum number of insns to translate.
-+ * @pc: guest virtual program counter address
-+ * @host_pc: host physical program counter address
-+ * @ops: Target-specific operations.
-+ * @db: Disassembly context.
-  *
-  * Generic translator loop.
-  *
-@@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps {
-  * - When single-stepping is enabled (system-wide or on the current vCPU).
-  * - When too many instructions have been translated.
-  */
--void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
--                     CPUState *cpu, TranslationBlock *tb, int max_insns);
-+void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
-+                     target_ulong pc, void *host_pc,
-+                     const TranslatorOps *ops, DisasContextBase *db);
- void translator_loop_temp_check(DisasContextBase *db);
 diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translate-all.c
 +++ b/accel/tcg/translate-all.c
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ void page_init(void)
  #include "exec/cputlb.h"
  #include "exec/translate-all.h"
 +#include "exec/translator.h"
  #include "qemu/bitmap.h"
  #include "qemu/qemu-print.h"
  #include "qemu/timer.h"
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
      TCGProfile *prof = &tcg_ctx->prof;
      int64_t ti;
  #endif
 +    void *host_pc;
      assert_memory_lock();
      qemu_thread_jit_write();
 -    phys_pc = get_page_addr_code(env, pc);
 +    phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
      if (phys_pc == -1) {
          /* Generate a one-shot TB with 1 insn in it */
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
      tcg_func_start(tcg_ctx);
      tcg_ctx->cpu = env_cpu(env);
 -    gen_intermediate_code(cpu, tb, max_insns);
 +    gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
      assert(tb->size != 0);
      tcg_ctx->cpu = NULL;
      max_insns = tb->icount;
 diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translator.c
 +++ b/accel/tcg/translator.c
@@ -XXX,XX +XXX,XX @@ static inline void translator_page_protect(DisasContextBase *dcbase,
  #endif
  }
--void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
+-static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
--                     CPUState *cpu, TranslationBlock *tb, int max_insns)
++static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
 +void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
 +                     target_ulong pc, void *host_pc,
 +                     const TranslatorOps *ops, DisasContextBase *db)
  {
-     uint32_t cflags = tb_cflags(tb);
+     PageDesc *pd;
-     bool plugin_enabled;
+     void **lp;
+@@ -XXX,XX +XXX,XX @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
-     /* Initialize DisasContext */
-     db->tb = tb;
+ static inline PageDesc *page_find(tb_page_addr_t index)
 -    db->pc_first = tb->pc;
 -    db->pc_next = db->pc_first;
 +    db->pc_first = pc;
 +    db->pc_next = pc;
      db->is_jmp = DISAS_NEXT;
      db->num_insns = 0;
      db->max_insns = max_insns;
 diff --git a/target/alpha/translate.c b/target/alpha/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/alpha/translate.c
 +++ b/target/alpha/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
      .disas_log          = alpha_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
-     DisasContext dc;
+-    return page_find_alloc(index, 0);
--    translator_loop(&alpha_tr_ops, &dc.base, cpu, tb, max_insns);
++    return page_find_alloc(index, false);
 +    translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
  }
- void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb,
+ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
-diff --git a/target/arm/translate.c b/target/arm/translate.c
+-                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
-index XXXXXXX..XXXXXXX 100644
++                           PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc);
---- a/target/arm/translate.c
-+++ b/target/arm/translate.c
+ /* In user-mode page locks aren't used; mmap_lock is enough */
-@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
+ #ifdef CONFIG_USER_ONLY
- };
+@@ -XXX,XX +XXX,XX @@ static inline void page_unlock(PageDesc *pd)
+ /* lock the page(s) of a TB in the correct acquisition order */
- /* generate intermediate code for basic block 'tb'.  */
+ static inline void page_lock_tb(const TranslationBlock *tb)
 -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
-     DisasContext dc = { };
+-    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
-     const TranslatorOps *ops = &arm_translator_ops;
++    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], false);
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
      }
  #endif
 -    translator_loop(ops, &dc.base, cpu, tb, max_insns);
 +    translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
  }
- void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
+ static inline void page_unlock_tb(const TranslationBlock *tb)
-diff --git a/target/avr/translate.c b/target/avr/translate.c
+@@ -XXX,XX +XXX,XX @@ void page_collection_unlock(struct page_collection *set)
-index XXXXXXX..XXXXXXX 100644
+ #endif /* !CONFIG_USER_ONLY */
---- a/target/avr/translate.c
-+++ b/target/avr/translate.c
+ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
-@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
+-                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
-     .disas_log          = avr_tr_disas_log,
++                           PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
-     DisasContext dc = { };
+     PageDesc *p1, *p2;
--    translator_loop(&avr_tr_ops, &dc.base, cs, tb, max_insns);
+     tb_page_addr_t page1;
-+    translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
+@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
- }
+      * Note that inserting into the hash table first isn't an option, since
+      * we can only insert TBs that are fully initialized.
- void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb,
+      */
-diff --git a/target/cris/translate.c b/target/cris/translate.c
+-    page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
-index XXXXXXX..XXXXXXX 100644
++    page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
---- a/target/cris/translate.c
+     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
-+++ b/target/cris/translate.c
+     if (p2) {
-@@ -XXX,XX +XXX,XX @@ static const TranslatorOps cris_tr_ops = {
+         tb_page_add(p2, tb, 1, phys_page2);
-     .disas_log          = cris_tr_disas_log,
+@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
- };
+     for (addr = start, len = end - start;
+          len != 0;
--void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
-+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+-        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
-+                           target_ulong pc, void *host_pc)
++        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
- {
-     DisasContext dc;
+         /* If the write protection bit is set, then we invalidate
--    translator_loop(&cris_tr_ops, &dc.base, cs, tb, max_insns);
+            the code inside.  */
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &cris_tr_ops, &dc.base);
  }
  void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/hexagon/translate.c
 +++ b/target/hexagon/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
      .disas_log          = hexagon_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&hexagon_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc,
 +                    &hexagon_tr_ops, &ctx.base);
  }
  #define NAME_LEN               64
 diff --git a/target/hppa/translate.c b/target/hppa/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/hppa/translate.c
 +++ b/target/hppa/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
      .disas_log          = hppa_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&hppa_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
  }
  void restore_state_to_opc(CPUHPPAState *env, TranslationBlock *tb,
 diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/i386/tcg/translate.c
 +++ b/target/i386/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
  };
  /* generate intermediate code for basic block 'tb'.  */
 -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext dc;
 -    translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
 +    translator_loop(cpu, tb, max_insns, pc, host_pc, &i386_tr_ops, &dc.base);
  }
  void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
 diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/loongarch/translate.c
 +++ b/target/loongarch/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
      .disas_log          = loongarch_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&loongarch_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc,
 +                    &loongarch_tr_ops, &ctx.base);
  }
  void loongarch_translate_init(void)
 diff --git a/target/m68k/translate.c b/target/m68k/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/m68k/translate.c
 +++ b/target/m68k/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
      .disas_log          = m68k_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext dc;
 -    translator_loop(&m68k_tr_ops, &dc.base, cpu, tb, max_insns);
 +    translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
  }
  static double floatx80_to_double(CPUM68KState *env, uint16_t high, uint64_t low)
 diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/microblaze/translate.c
 +++ b/target/microblaze/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
      .disas_log          = mb_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext dc;
 -    translator_loop(&mb_tr_ops, &dc.base, cpu, tb, max_insns);
 +    translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
  }
  void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/mips/tcg/translate.c
 +++ b/target/mips/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
      .disas_log          = mips_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &mips_tr_ops, &ctx.base);
  }
  void mips_tcg_init(void)
 diff --git a/target/nios2/translate.c b/target/nios2/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/nios2/translate.c
 +++ b/target/nios2/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps nios2_tr_ops = {
      .disas_log          = nios2_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext dc;
 -    translator_loop(&nios2_tr_ops, &dc.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &nios2_tr_ops, &dc.base);
  }
  void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/openrisc/translate.c
 +++ b/target/openrisc/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
      .disas_log          = openrisc_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc,
 +                    &openrisc_tr_ops, &ctx.base);
  }
  void openrisc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 diff --git a/target/ppc/translate.c b/target/ppc/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/ppc/translate.c
 +++ b/target/ppc/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
      .disas_log          = ppc_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&ppc_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &ppc_tr_ops, &ctx.base);
  }
  void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb,
 diff --git a/target/riscv/translate.c b/target/riscv/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/riscv/translate.c
 +++ b/target/riscv/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
      .disas_log          = riscv_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&riscv_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &riscv_tr_ops, &ctx.base);
  }
  void riscv_translate_init(void)
 diff --git a/target/rx/translate.c b/target/rx/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/rx/translate.c
 +++ b/target/rx/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
      .disas_log          = rx_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext dc;
 -    translator_loop(&rx_tr_ops, &dc.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &rx_tr_ops, &dc.base);
  }
  void restore_state_to_opc(CPURXState *env, TranslationBlock *tb,
 diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/s390x/tcg/translate.c
 +++ b/target/s390x/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
      .disas_log          = s390x_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext dc;
 -    translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &s390x_tr_ops, &dc.base);
  }
  void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
 diff --git a/target/sh4/translate.c b/target/sh4/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/sh4/translate.c
 +++ b/target/sh4/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
      .disas_log          = sh4_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &sh4_tr_ops, &ctx.base);
  }
  void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
 diff --git a/target/sparc/translate.c b/target/sparc/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/sparc/translate.c
 +++ b/target/sparc/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
      .disas_log          = sparc_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext dc = {};
 -    translator_loop(&sparc_tr_ops, &dc.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc, &sparc_tr_ops, &dc.base);
  }
  void sparc_tcg_init(void)
 diff --git a/target/tricore/translate.c b/target/tricore/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/tricore/translate.c
 +++ b/target/tricore/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
  };
 -void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext ctx;
 -    translator_loop(&tricore_tr_ops, &ctx.base, cs, tb, max_insns);
 +    translator_loop(cs, tb, max_insns, pc, host_pc,
 +                    &tricore_tr_ops, &ctx.base);
  }
  void
 diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/xtensa/translate.c
 +++ b/target/xtensa/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
      .disas_log          = xtensa_tr_disas_log,
  };
 -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
 +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
 +                           target_ulong pc, void *host_pc)
  {
      DisasContext dc = {};
 -    translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb, max_insns);
 +    translator_loop(cpu, tb, max_insns, pc, host_pc,
 +                    &xtensa_translator_ops, &dc.base);
  }
  void xtensa_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 --
 .34.1

-[PULL 2/4] target/avr: Call avr_cpu_do_interrupt directly
+[PULL 12/20] accel/tcg: Use DisasContextBase in plugin_gen_tb_start
-There is no need to go through cc->tcg_ops when
+Use the pc coming from db->pc_first rather than the TB.
 we know what value that must have.
-Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
+Use the cached host_addr rather than re-computing for the
-Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
+first page.  We still need a separate lookup for the second
 page because it won't be computed for DisasContextBase until
 the translator actually performs a read from the page.
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/avr/helper.c | 5 ++---
+ include/exec/plugin-gen.h |  7 ++++---
-file changed, 2 insertions(+), 3 deletions(-)
+ accel/tcg/plugin-gen.c    | 22 +++++++++++-----------
  accel/tcg/translator.c    |  2 +-
 files changed, 16 insertions(+), 15 deletions(-)
-diff --git a/target/avr/helper.c b/target/avr/helper.c
+diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
 index XXXXXXX..XXXXXXX 100644
---- a/target/avr/helper.c
+--- a/include/exec/plugin-gen.h
-+++ b/target/avr/helper.c
++++ b/include/exec/plugin-gen.h
-@@ -XXX,XX +XXX,XX @@
+@@ -XXX,XX +XXX,XX @@ struct DisasContextBase;
- bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
  #ifdef CONFIG_PLUGIN
 -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress);
 +bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db,
 +                         bool supress);
  void plugin_gen_tb_end(CPUState *cpu);
  void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
  void plugin_gen_insn_end(void);
@@ -XXX,XX +XXX,XX @@ static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
  #else /* !CONFIG_PLUGIN */
 -static inline
 -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress)
 +static inline bool
 +plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup)
  {
      return false;
  }
 diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/plugin-gen.c
 +++ b/accel/tcg/plugin-gen.c
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
      pr_ops();
  }
 -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only)
 +bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
 +                         bool mem_only)
  {
      bool ret = false;
--    CPUClass *cc = CPU_GET_CLASS(cs);
-     AVRCPU *cpu = AVR_CPU(cs);
+@@ -XXX,XX +XXX,XX @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl
-     CPUAVRState *env = &cpu->env;
+         ret = true;
-     if (interrupt_request & CPU_INTERRUPT_RESET) {
-         if (cpu_interrupts_enabled(env)) {
+-        ptb->vaddr = tb->pc;
-             cs->exception_index = EXCP_RESET;
++        ptb->vaddr = db->pc_first;
--            cc->tcg_ops->do_interrupt(cs);
+         ptb->vaddr2 = -1;
-+            avr_cpu_do_interrupt(cs);
+-        get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
++        ptb->haddr1 = db->host_addr[0];
-             cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
+         ptb->haddr2 = NULL;
+         ptb->mem_only = mem_only;
-@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
-         if (cpu_interrupts_enabled(env) && env->intsrc != 0) {
+@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
-             int index = ctz32(env->intsrc);
+      * Note that we skip this when haddr1 == NULL, e.g. when we're
-             cs->exception_index = EXCP_INT(index);
+      * fetching instructions from a region not backed by RAM.
--            cc->tcg_ops->do_interrupt(cs);
+      */
-+            avr_cpu_do_interrupt(cs);
+-    if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) &&
+-        unlikely((db->pc_next & TARGET_PAGE_MASK) !=
-             env->intsrc &= env->intsrc - 1; /* clear the interrupt */
+-                 (db->pc_first & TARGET_PAGE_MASK))) {
-             if (!env->intsrc) {
+-        get_page_addr_code_hostp(cpu->env_ptr, db->pc_next,
 -                                 &ptb->haddr2);
 -        ptb->vaddr2 = db->pc_next;
 -    }
 -    if (likely(ptb->vaddr2 == -1)) {
 +    if (ptb->haddr1 == NULL) {
 +        pinsn->haddr = NULL;
 +    } else if (is_same_page(db, db->pc_next)) {
          pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr;
      } else {
 +        if (ptb->vaddr2 == -1) {
 +            ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first);
 +            get_page_addr_code_hostp(cpu->env_ptr, ptb->vaddr2, &ptb->haddr2);
 +        }
          pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2;
      }
  }
 diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translator.c
 +++ b/accel/tcg/translator.c
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
      ops->tb_start(db, cpu);
      tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 -    plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY);
 +    plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY);
      while (true) {
          db->num_insns++;
 --
 .34.1

-[PULL 09/20] accel/tcg: Unlock mmap_lock after longjmp
+[PULL 13/20] accel/tcg: Do not align tb->page_addr[0]
-The mmap_lock is held around tb_gen_code.  While the comment
+Let tb->page_addr[0] contain the address of the first byte of the
-is correct that the lock is dropped when tb_gen_code runs out
+translated block, rather than the address of the page containing the
-of memory, the lock is *not* dropped when an exception is
+start of the translated block.  We need to recover this value anyway
-raised reading code for translation.
+at various points, and it is easier to discard a page offset when it
 is not needed, which happens naturally via the existing find_page shift.
-Acked-by: Alistair Francis <alistair.francis@wdc.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/cpu-exec.c  | 12 ++++++------
+ accel/tcg/cpu-exec.c      | 16 ++++++++--------
- accel/tcg/user-exec.c |  3 ---
+ accel/tcg/cputlb.c        |  3 ++-
-files changed, 6 insertions(+), 9 deletions(-)
+ accel/tcg/translate-all.c |  9 +++++----
 files changed, 15 insertions(+), 13 deletions(-)
 diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cpu-exec.c
 +++ b/accel/tcg/cpu-exec.c
-@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
+@@ -XXX,XX +XXX,XX @@ struct tb_desc {
-         cpu_tb_exec(cpu, tb, &tb_exit);
+     target_ulong pc;
-         cpu_exec_exit(cpu);
+     target_ulong cs_base;
      CPUArchState *env;
 -    tb_page_addr_t phys_page1;
 +    tb_page_addr_t page_addr0;
      uint32_t flags;
      uint32_t cflags;
      uint32_t trace_vcpu_dstate;
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
      const struct tb_desc *desc = d;
      if (tb->pc == desc->pc &&
 -        tb->page_addr[0] == desc->phys_page1 &&
 +        tb->page_addr[0] == desc->page_addr0 &&
          tb->cs_base == desc->cs_base &&
          tb->flags == desc->flags &&
          tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
          if (tb->page_addr[1] == -1) {
              return true;
          } else {
 -            tb_page_addr_t phys_page2;
 -            target_ulong virt_page2;
 +            tb_page_addr_t phys_page1;
 +            target_ulong virt_page1;
              /*
               * We know that the first page matched, and an otherwise valid TB
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
               * is different for the new TB.  Therefore any exception raised
               * here by the faulting lookup is not premature.
               */
 -            virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
 -            phys_page2 = get_page_addr_code(desc->env, virt_page2);
 -            if (tb->page_addr[1] == phys_page2) {
 +            virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
 +            phys_page1 = get_page_addr_code(desc->env, virt_page1);
 +            if (tb->page_addr[1] == phys_page1) {
                  return true;
              }
          }
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
      if (phys_pc == -1) {
          return NULL;
      }
 -    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
 +    desc.page_addr0 = phys_pc;
      h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
      return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
  }
 diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cputlb.c
 +++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
     can be detected */
  void tlb_protect_code(ram_addr_t ram_addr)
  {
 -    cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
 +    cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
 +                                             TARGET_PAGE_SIZE,
                                               DIRTY_MEMORY_CODE);
  }
 diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translate-all.c
 +++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
      qemu_spin_unlock(&tb->jmp_lock);
      /* remove the TB from the hash list */
 -    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
 +    phys_pc = tb->page_addr[0];
      h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
                       tb->trace_vcpu_dstate);
      if (!qht_remove(&tb_ctx.htable, tb, h)) {
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
       * we can only insert TBs that are fully initialized.
       */
      page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
 -    tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
 +    tb_page_add(p, tb, 0, phys_pc);
      if (p2) {
          tb_page_add(p2, tb, 1, phys_page2);
      } else {
--        /*
+@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
--         * The mmap_lock is dropped by tb_gen_code if it runs out of
+         if (n == 0) {
--         * memory.
+             /* NOTE: tb_end may be after the end of the page, but
--         */
+                it is not a problem */
- #ifndef CONFIG_SOFTMMU
+-            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
-         clear_helper_retaddr();
++            tb_start = tb->page_addr[0];
--        tcg_debug_assert(!have_mmap_lock());
+             tb_end = tb_start + tb->size;
-+        if (have_mmap_lock()) {
+         } else {
-+            mmap_unlock();
+             tb_start = tb->page_addr[1];
-+        }
+-            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
- #endif
++            tb_end = tb_start + ((tb->page_addr[0] + tb->size)
-         if (qemu_mutex_iothread_locked()) {
++                                 & ~TARGET_PAGE_MASK);
-             qemu_mutex_unlock_iothread();
+         }
-@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
+         if (!(tb_end <= start || tb_start >= end)) {
+ #ifdef TARGET_HAS_PRECISE_SMC
  #ifndef CONFIG_SOFTMMU
          clear_helper_retaddr();
 -        tcg_debug_assert(!have_mmap_lock());
 +        if (have_mmap_lock()) {
 +            mmap_unlock();
 +        }
  #endif
          if (qemu_mutex_iothread_locked()) {
              qemu_mutex_unlock_iothread();
 diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/user-exec.c
 +++ b/accel/tcg/user-exec.c
@@ -XXX,XX +XXX,XX @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
           * (and if the translator doesn't handle page boundaries correctly
           * there's little we can do about that here).  Therefore, do not
           * trigger the unwinder.
 -         *
 -         * Like tb_gen_code, release the memory lock before cpu_loop_exit.
           */
 -        mmap_unlock();
          *pc = 0;
          return MMU_INST_FETCH;
      }
 --
 .34.1

-[PULL 12/20] accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp
+[PULL 14/20] accel/tcg: Inline tb_flush_jmp_cache
-Simplify the implementation of get_page_addr_code_hostp
+This function has two users, who use it incompatibly.
-by reusing the existing probe_access infrastructure.
+In tlb_flush_page_by_mmuidx_async_0, when flushing a
 single page, we need to flush exactly two pages.
 In tlb_flush_range_by_mmuidx_async_0, when flushing a
 range of pages, we need to flush N+1 pages.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+This avoids double-flushing of jmp cache pages in a range.
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/cputlb.c | 76 ++++++++++++++++------------------------------
+ accel/tcg/cputlb.c | 25 ++++++++++++++-----------
-file changed, 26 insertions(+), 50 deletions(-)
+file changed, 14 insertions(+), 11 deletions(-)
 diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cputlb.c
 +++ b/accel/tcg/cputlb.c
-@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
+@@ -XXX,XX +XXX,XX @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
-   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
+     }
-                  (ADDR) & TARGET_PAGE_MASK)
+ }
--/*
+-static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
 - * Return a ram_addr_t for the virtual address for execution.
 - *
 - * Return -1 if we can't translate and execute from an entire page
 - * of RAM.  This will force us to execute by loading and translating
 - * one insn at a time, without caching.
 - *
 - * NOTE: This function will trigger an exception if the page is
 - * not executable.
 - */
 -tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
 -                                        void **hostp)
 -{
--    uintptr_t mmu_idx = cpu_mmu_index(env, true);
+-    /* Discard jump cache entries for any tb which might potentially
--    uintptr_t index = tlb_index(env, mmu_idx, addr);
+-       overlap the flushed page.  */
--    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
+-    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
--    void *p;
+-    tb_jmp_cache_clear_page(cpu, addr);
 -
 -    if (unlikely(!tlb_hit(entry->addr_code, addr))) {
 -        if (!VICTIM_TLB_HIT(addr_code, addr)) {
 -            tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
 -            index = tlb_index(env, mmu_idx, addr);
 -            entry = tlb_entry(env, mmu_idx, addr);
 -
 -            if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
 -                /*
 -                 * The MMU protection covers a smaller range than a target
 -                 * page, so we must redo the MMU check for every insn.
 -                 */
 -                return -1;
 -            }
 -        }
 -        assert(tlb_hit(entry->addr_code, addr));
 -    }
 -
 -    if (unlikely(entry->addr_code & TLB_MMIO)) {
 -        /* The region is not backed by RAM.  */
 -        if (hostp) {
 -            *hostp = NULL;
 -        }
 -        return -1;
 -    }
 -
 -    p = (void *)((uintptr_t)addr + entry->addend);
 -    if (hostp) {
 -        *hostp = p;
 -    }
 -    return qemu_ram_addr_from_host_nofail(p);
 -}
 -
- static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
+ /**
-                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
+  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
- {
+  * @desc: The CPUTLBDesc portion of the TLB
-@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
+@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
-     return flags ? NULL : host;
+     }
      qemu_spin_unlock(&env_tlb(env)->c.lock);
 -    tb_flush_jmp_cache(cpu, addr);
 +    /*
 +     * Discard jump cache entries for any tb which might potentially
 +     * overlap the flushed page, which includes the previous.
 +     */
 +    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
 +    tb_jmp_cache_clear_page(cpu, addr);
  }
-+/*
+ /**
-+ * Return a ram_addr_t for the virtual address for execution.
+@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
-+ *
+         return;
-+ * Return -1 if we can't translate and execute from an entire page
+     }
-+ * of RAM.  This will force us to execute by loading and translating
-+ * one insn at a time, without caching.
+-    for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
-+ *
+-        tb_flush_jmp_cache(cpu, d.addr + i);
-+ * NOTE: This function will trigger an exception if the page is
++    /*
-+ * not executable.
++     * Discard jump cache entries for any tb which might potentially
-+ */
++     * overlap the flushed pages, which includes the previous.
-+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
++     */
-+                                        void **hostp)
++    d.addr -= TARGET_PAGE_SIZE;
-+{
++    for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
-+    void *p;
++        tb_jmp_cache_clear_page(cpu, d.addr);
-+
++        d.addr += TARGET_PAGE_SIZE;
-+    (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
+     }
-+                                cpu_mmu_index(env, true), false, &p, 0);
+ }
-+    if (p == NULL) {
 +        return -1;
 +    }
 +    if (hostp) {
 +        *hostp = p;
 +    }
 +    return qemu_ram_addr_from_host_nofail(p);
 +}
 +
  #ifdef CONFIG_PLUGIN
  /*
   * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
 --
 .34.1

-[PULL 18/20] target/i386: Make translator stop before the end of a page
+[PULL 15/20] include/hw/core: Create struct CPUJumpCache
-From: Ilya Leoshkevich <iii@linux.ibm.com>
+Wrap the bare TranslationBlock pointer into a structure.
-Right now translator stops right *after* the end of a page, which
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
-breaks reporting of fault locations when the last instruction of a
+Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
 multi-insn translation block crosses a page boundary.
 An implementation, like the one arm and s390x have, would require an
 i386 length disassembler, which is burdensome to maintain. Another
 alternative would be to single-step at the end of a guest page, but
 this may come with a performance impact.
 Fix by snapshotting disassembly state and restoring it after we figure
 out we crossed a page boundary. This includes rolling back cc_op
 updates and emitted ops.
 Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1143
 Message-Id: <20220817150506.592862-4-iii@linux.ibm.com>
 [rth: Simplify end-of-insn cross-page checks.]
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/i386/tcg/translate.c      | 64 ++++++++++++++++-----------
+ accel/tcg/tb-hash.h       |  1 +
- tests/tcg/x86_64/noexec.c        | 75 ++++++++++++++++++++++++++++++++
+ accel/tcg/tb-jmp-cache.h  | 24 ++++++++++++++++++++++++
- tests/tcg/x86_64/Makefile.target |  3 +-
+ include/exec/cpu-common.h |  1 +
-files changed, 116 insertions(+), 26 deletions(-)
+ include/hw/core/cpu.h     | 15 +--------------
- create mode 100644 tests/tcg/x86_64/noexec.c
+ include/qemu/typedefs.h   |  1 +
  accel/stubs/tcg-stub.c    |  4 ++++
  accel/tcg/cpu-exec.c      | 10 +++++++---
  accel/tcg/cputlb.c        |  9 +++++----
  accel/tcg/translate-all.c | 28 +++++++++++++++++++++++++---
  hw/core/cpu-common.c      |  3 +--
  plugins/core.c            |  2 +-
  trace/control-target.c    |  2 +-
 files changed, 72 insertions(+), 28 deletions(-)
  create mode 100644 accel/tcg/tb-jmp-cache.h
-diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
+diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
 index XXXXXXX..XXXXXXX 100644
---- a/target/i386/tcg/translate.c
+--- a/accel/tcg/tb-hash.h
-+++ b/target/i386/tcg/translate.c
++++ b/accel/tcg/tb-hash.h
-@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
+@@ -XXX,XX +XXX,XX @@
-     TCGv_i64 tmp1_i64;
+ #include "exec/cpu-defs.h"
+ #include "exec/exec-all.h"
-     sigjmp_buf jmpbuf;
+ #include "qemu/xxhash.h"
-+    TCGOp *prev_insn_end;
++#include "tb-jmp-cache.h"
- } DisasContext;
+ #ifdef CONFIG_SOFTMMU
- /* The environment in which user-only runs is constrained. */
-@@ -XXX,XX +XXX,XX @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
+diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
  {
      uint64_t pc = s->pc;
 +    /* This is a subsequent insn that crosses a page boundary.  */
 +    if (s->base.num_insns > 1 &&
 +        !is_same_page(&s->base, s->pc + num_bytes - 1)) {
 +        siglongjmp(s->jmpbuf, 2);
 +    }
 +
      s->pc += num_bytes;
      if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
          /* If the instruction's 16th byte is on a different page than the 1st, a
@@ -XXX,XX +XXX,XX @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
      int modrm, reg, rm, mod, op, opreg, val;
      target_ulong next_eip, tval;
      target_ulong pc_start = s->base.pc_next;
 +    bool orig_cc_op_dirty = s->cc_op_dirty;
 +    CCOp orig_cc_op = s->cc_op;
      s->pc_start = s->pc = pc_start;
      s->override = -1;
@@ -XXX,XX +XXX,XX @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
      s->rip_offset = 0; /* for relative ip address */
      s->vex_l = 0;
      s->vex_v = 0;
 -    if (sigsetjmp(s->jmpbuf, 0) != 0) {
 +    switch (sigsetjmp(s->jmpbuf, 0)) {
 +    case 0:
 +        break;
 +    case 1:
          gen_exception_gpf(s);
          return s->pc;
 +    case 2:
 +        /* Restore state that may affect the next instruction. */
 +        s->cc_op_dirty = orig_cc_op_dirty;
 +        s->cc_op = orig_cc_op;
 +        s->base.num_insns--;
 +        tcg_remove_ops_after(s->prev_insn_end);
 +        s->base.is_jmp = DISAS_TOO_MANY;
 +        return pc_start;
 +    default:
 +        g_assert_not_reached();
      }
      prefixes = 0;
@@ -XXX,XX +XXX,XX @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
  {
      DisasContext *dc = container_of(dcbase, DisasContext, base);
 +    dc->prev_insn_end = tcg_last_op();
      tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
  }
@@ -XXX,XX +XXX,XX @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
  #endif
      pc_next = disas_insn(dc, cpu);
 -
 -    if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
 -        /* if single step mode, we generate only one instruction and
 -           generate an exception */
 -        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
 -           the flag and abort the translation to give the irqs a
 -           chance to happen */
 -        dc->base.is_jmp = DISAS_TOO_MANY;
 -    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
 -               && ((pc_next & TARGET_PAGE_MASK)
 -                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
 -                       & TARGET_PAGE_MASK)
 -                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
 -        /* Do not cross the boundary of the pages in icount mode,
 -           it can cause an exception. Do it only when boundary is
 -           crossed by the first instruction in the block.
 -           If current instruction already crossed the bound - it's ok,
 -           because an exception hasn't stopped this code.
 -         */
 -        dc->base.is_jmp = DISAS_TOO_MANY;
 -    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
 -        dc->base.is_jmp = DISAS_TOO_MANY;
 -    }
 -
      dc->base.pc_next = pc_next;
 +
 +    if (dc->base.is_jmp == DISAS_NEXT) {
 +        if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
 +            /*
 +             * If single step mode, we generate only one instruction and
 +             * generate an exception.
 +             * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
 +             * the flag and abort the translation to give the irqs a
 +             * chance to happen.
 +             */
 +            dc->base.is_jmp = DISAS_TOO_MANY;
 +        } else if (!is_same_page(&dc->base, pc_next)) {
 +            dc->base.is_jmp = DISAS_TOO_MANY;
 +        }
 +    }
  }
  static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
 diff --git a/tests/tcg/x86_64/noexec.c b/tests/tcg/x86_64/noexec.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
-+++ b/tests/tcg/x86_64/noexec.c
++++ b/accel/tcg/tb-jmp-cache.h
 @@ -XXX,XX +XXX,XX @@
-+#include "../multiarch/noexec.c.inc"
++/*
-+
++ * The per-CPU TranslationBlock jump cache.
-+static void *arch_mcontext_pc(const mcontext_t *ctx)
++ *
-+{
++ *  Copyright (c) 2003 Fabrice Bellard
-+    return (void *)ctx->gregs[REG_RIP];
++ *
-+}
++ * SPDX-License-Identifier: GPL-2.0-or-later
-+
++ */
-+int arch_mcontext_arg(const mcontext_t *ctx)
++
-+{
++#ifndef ACCEL_TCG_TB_JMP_CACHE_H
-+    return ctx->gregs[REG_RDI];
++#define ACCEL_TCG_TB_JMP_CACHE_H
-+}
++
-+
++#define TB_JMP_CACHE_BITS 12
-+static void arch_flush(void *p, int len)
++#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
 +
 +/*
 + * Accessed in parallel; all accesses to 'tb' must be atomic.
 + */
 +struct CPUJumpCache {
 +    struct {
 +        TranslationBlock *tb;
 +    } array[TB_JMP_CACHE_SIZE];
 +};
 +
 +#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
 diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/exec/cpu-common.h
 +++ b/include/exec/cpu-common.h
@@ -XXX,XX +XXX,XX @@ void cpu_list_unlock(void);
  unsigned int cpu_list_generation_id_get(void);
  void tcg_flush_softmmu_tlb(CPUState *cs);
 +void tcg_flush_jmp_cache(CPUState *cs);
  void tcg_iommu_init_notifier_list(CPUState *cpu);
  void tcg_iommu_free_notifier_list(CPUState *cpu);
 diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/hw/core/cpu.h
 +++ b/include/hw/core/cpu.h
@@ -XXX,XX +XXX,XX @@ struct kvm_run;
  struct hax_vcpu_state;
  struct hvf_vcpu_state;
 -#define TB_JMP_CACHE_BITS 12
 -#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
 -
  /* work queue */
  /* The union type allows passing of 64 bit target pointers on 32 bit
@@ -XXX,XX +XXX,XX @@ struct CPUState {
      CPUArchState *env_ptr;
      IcountDecr *icount_decr_ptr;
 -    /* Accessed in parallel; all accesses must be atomic */
 -    TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
 +    CPUJumpCache *tb_jmp_cache;
      struct GDBRegisterState *gdb_regs;
      int gdb_num_regs;
@@ -XXX,XX +XXX,XX @@ extern CPUTailQ cpus;
  extern __thread CPUState *current_cpu;
 -static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
 -{
 -    unsigned int i;
 -
 -    for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
 -        qatomic_set(&cpu->tb_jmp_cache[i], NULL);
 -    }
 -}
 -
  /**
   * qemu_tcg_mttcg_enabled:
   * Check whether we are running MultiThread TCG or not.
 diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/qemu/typedefs.h
 +++ b/include/qemu/typedefs.h
@@ -XXX,XX +XXX,XX @@ typedef struct CoMutex CoMutex;
  typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
  typedef struct CPUAddressSpace CPUAddressSpace;
  typedef struct CPUArchState CPUArchState;
 +typedef struct CPUJumpCache CPUJumpCache;
  typedef struct CPUState CPUState;
  typedef struct CPUTLBEntryFull CPUTLBEntryFull;
  typedef struct DeviceListener DeviceListener;
 diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/stubs/tcg-stub.c
 +++ b/accel/stubs/tcg-stub.c
@@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
  {
  }
 +void tcg_flush_jmp_cache(CPUState *cpu)
 +{
 +}
 +
-+extern char noexec_1[];
+ int probe_access_flags(CPUArchState *env, target_ulong addr,
-+extern char noexec_2[];
+                        MMUAccessType access_type, int mmu_idx,
-+extern char noexec_end[];
+                        bool nonfault, void **phost, uintptr_t retaddr)
-+
+diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
-+asm("noexec_1:\n"
+index XXXXXXX..XXXXXXX 100644
-+    "    movq $1,%rdi\n"    /* %rdi is 0 on entry, set 1. */
+--- a/accel/tcg/cpu-exec.c
-+    "noexec_2:\n"
++++ b/accel/tcg/cpu-exec.c
-+    "    movq $2,%rdi\n"    /* %rdi is 0/1; set 2. */
+@@ -XXX,XX +XXX,XX @@
-+    "    ret\n"
+ #include "sysemu/replay.h"
-+    "noexec_end:");
+ #include "sysemu/tcg.h"
-+
+ #include "exec/helper-proto.h"
-+int main(void)
++#include "tb-jmp-cache.h"
  #include "tb-hash.h"
  #include "tb-context.h"
  #include "internal.h"
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
      tcg_debug_assert(!(cflags & CF_INVALID));
      hash = tb_jmp_cache_hash_func(pc);
 -    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
 +    tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
      if (likely(tb &&
                 tb->pc == pc &&
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
      if (tb == NULL) {
          return NULL;
      }
 -    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
 +    qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
      return tb;
  }
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
              tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
              if (tb == NULL) {
 +                uint32_t h;
 +
                  mmap_lock();
                  tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
                  mmap_unlock();
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
                   * We add the TB in the virtual pc hash table
                   * for the fast lookup
                   */
 -                qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
 +                h = tb_jmp_cache_hash_func(pc);
 +                qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
              }
  #ifndef CONFIG_USER_ONLY
 diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cputlb.c
 +++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
  static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
  {
 -    unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
 +    int i, i0 = tb_jmp_cache_hash_page(page_addr);
 +    CPUJumpCache *jc = cpu->tb_jmp_cache;
      for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
 -        qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
 +        qatomic_set(&jc->array[i0 + i].tb, NULL);
      }
  }
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
      qemu_spin_unlock(&env_tlb(env)->c.lock);
 -    cpu_tb_jmp_cache_clear(cpu);
 +    tcg_flush_jmp_cache(cpu);
      if (to_clean == ALL_MMUIDX_BITS) {
          qatomic_set(&env_tlb(env)->c.full_flush_count,
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
       * longer to clear each entry individually than it will to clear it all.
       */
      if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
 -        cpu_tb_jmp_cache_clear(cpu);
 +        tcg_flush_jmp_cache(cpu);
          return;
      }
 diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translate-all.c
 +++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@
  #include "sysemu/tcg.h"
  #include "qapi/error.h"
  #include "hw/core/tcg-cpu-ops.h"
 +#include "tb-jmp-cache.h"
  #include "tb-hash.h"
  #include "tb-context.h"
  #include "internal.h"
@@ -XXX,XX +XXX,XX @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
      }
      CPU_FOREACH(cpu) {
 -        cpu_tb_jmp_cache_clear(cpu);
 +        tcg_flush_jmp_cache(cpu);
      }
      qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
      /* remove the TB from the hash list */
      h = tb_jmp_cache_hash_func(tb->pc);
      CPU_FOREACH(cpu) {
 -        if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
 -            qatomic_set(&cpu->tb_jmp_cache[h], NULL);
 +        CPUJumpCache *jc = cpu->tb_jmp_cache;
 +        if (qatomic_read(&jc->array[h].tb) == tb) {
 +            qatomic_set(&jc->array[h].tb, NULL);
          }
      }
@@ -XXX,XX +XXX,XX @@ int page_unprotect(target_ulong address, uintptr_t pc)
  }
  #endif /* CONFIG_USER_ONLY */
 +/*
 + * Called by generic code at e.g. cpu reset after cpu creation,
 + * therefore we must be prepared to allocate the jump cache.
 + */
 +void tcg_flush_jmp_cache(CPUState *cpu)
 +{
-+    struct noexec_test noexec_tests[] = {
++    CPUJumpCache *jc = cpu->tb_jmp_cache;
-+        {
++
-+            .name = "fallthrough",
++    if (likely(jc)) {
-+            .test_code = noexec_1,
++        for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
-+            .test_len = noexec_end - noexec_1,
++            qatomic_set(&jc->array[i].tb, NULL);
-+            .page_ofs = noexec_1 - noexec_2,
++        }
-+            .entry_ofs = noexec_1 - noexec_2,
++    } else {
-+            .expected_si_ofs = 0,
++        /* This should happen once during realize, and thus never race. */
-+            .expected_pc_ofs = 0,
++        jc = g_new0(CPUJumpCache, 1);
-+            .expected_arg = 1,
++        jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
-+        },
++        assert(jc == NULL);
-+        {
++    }
 +            .name = "jump",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2,
 +            .entry_ofs = 0,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = 0,
 +            .expected_arg = 0,
 +        },
 +        {
 +            .name = "fallthrough [cross]",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2 - 2,
 +            .entry_ofs = noexec_1 - noexec_2 - 2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = -2,
 +            .expected_arg = 1,
 +        },
 +        {
 +            .name = "jump [cross]",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2 - 2,
 +            .entry_ofs = -2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = -2,
 +            .expected_arg = 0,
 +        },
 +    };
 +
 +    return test_noexec(noexec_tests,
 +                       sizeof(noexec_tests) / sizeof(noexec_tests[0]));
 +}
-diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
++
-index XXXXXXX..XXXXXXX 100644
+ /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
---- a/tests/tcg/x86_64/Makefile.target
+ void tcg_flush_softmmu_tlb(CPUState *cs)
-+++ b/tests/tcg/x86_64/Makefile.target
+ {
-@@ -XXX,XX +XXX,XX @@ include $(SRC_PATH)/tests/tcg/i386/Makefile.target
+diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
+index XXXXXXX..XXXXXXX 100644
- ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET))
+--- a/hw/core/cpu-common.c
- X86_64_TESTS += vsyscall
++++ b/hw/core/cpu-common.c
-+X86_64_TESTS += noexec
+@@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(DeviceState *dev)
- TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
+     cpu->cflags_next_tb = -1;
- else
- TESTS=$(MULTIARCH_TESTS)
+     if (tcg_enabled()) {
-@@ -XXX,XX +XXX,XX @@ test-x86_64: LDFLAGS+=-lm -lc
+-        cpu_tb_jmp_cache_clear(cpu);
- test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
+-
-     $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
++        tcg_flush_jmp_cache(cpu);
+         tcg_flush_softmmu_tlb(cpu);
--vsyscall: $(SRC_PATH)/tests/tcg/x86_64/vsyscall.c
+     }
-+%: $(SRC_PATH)/tests/tcg/x86_64/%.c
+ }
-     $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
+diff --git a/plugins/core.c b/plugins/core.c
 index XXXXXXX..XXXXXXX 100644
 --- a/plugins/core.c
 +++ b/plugins/core.c
@@ -XXX,XX +XXX,XX @@ struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id)
  static void plugin_cpu_update__async(CPUState *cpu, run_on_cpu_data data)
  {
      bitmap_copy(cpu->plugin_mask, &data.host_ulong, QEMU_PLUGIN_EV_MAX);
 -    cpu_tb_jmp_cache_clear(cpu);
 +    tcg_flush_jmp_cache(cpu);
  }
  static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
 diff --git a/trace/control-target.c b/trace/control-target.c
 index XXXXXXX..XXXXXXX 100644
 --- a/trace/control-target.c
 +++ b/trace/control-target.c
@@ -XXX,XX +XXX,XX @@ static void trace_event_synchronize_vcpu_state_dynamic(
  {
      bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
                  CPU_TRACE_DSTATE_MAX_EVENTS);
 -    cpu_tb_jmp_cache_clear(vcpu);
 +    tcg_flush_jmp_cache(vcpu);
  }
  void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
 --
 .34.1

-[PULL 01/20] linux-user/arm: Mark the commpage executable
+[PULL 16/20] hw/core: Add CPUClass.get_pc
-We're about to start validating PAGE_EXEC, which means
+Populate this new method for all targets.  Always match
-that we've got to mark the commpage executable.  We had
+the result that would be given by cpu_get_tb_cpu_state,
-been placing the commpage outside of reserved_va, which
+as we will want these values to correspond in the logs.
 was incorrect and lead to an abort.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (target/sparc)
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- linux-user/arm/target_cpu.h | 4 ++--
+Cc: Eduardo Habkost <eduardo@habkost.net> (supporter:Machine core)
- linux-user/elfload.c        | 6 +++++-
+Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> (supporter:Machine core)
-files changed, 7 insertions(+), 3 deletions(-)
+Cc: "Philippe Mathieu-Daudé" <f4bug@amsat.org> (reviewer:Machine core)
 Cc: Yanan Wang <wangyanan55@huawei.com> (reviewer:Machine core)
 Cc: Michael Rolnik <mrolnik@gmail.com> (maintainer:AVR TCG CPUs)
 Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com> (maintainer:CRIS TCG CPUs)
 Cc: Taylor Simpson <tsimpson@quicinc.com> (supporter:Hexagon TCG CPUs)
 Cc: Song Gao <gaosong@loongson.cn> (maintainer:LoongArch TCG CPUs)
 Cc: Xiaojuan Yang <yangxiaojuan@loongson.cn> (maintainer:LoongArch TCG CPUs)
 Cc: Laurent Vivier <laurent@vivier.eu> (maintainer:M68K TCG CPUs)
 Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> (reviewer:MIPS TCG CPUs)
 Cc: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> (reviewer:MIPS TCG CPUs)
 Cc: Chris Wulff <crwulff@gmail.com> (maintainer:NiosII TCG CPUs)
 Cc: Marek Vasut <marex@denx.de> (maintainer:NiosII TCG CPUs)
 Cc: Stafford Horne <shorne@gmail.com> (odd fixer:OpenRISC TCG CPUs)
 Cc: Yoshinori Sato <ysato@users.sourceforge.jp> (reviewer:RENESAS RX CPUs)
 Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (maintainer:SPARC TCG CPUs)
 Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de> (maintainer:TriCore TCG CPUs)
 Cc: Max Filippov <jcmvbkbc@gmail.com> (maintainer:Xtensa TCG CPUs)
 Cc: qemu-arm@nongnu.org (open list:ARM TCG CPUs)
 Cc: qemu-ppc@nongnu.org (open list:PowerPC TCG CPUs)
 Cc: qemu-riscv@nongnu.org (open list:RISC-V TCG CPUs)
 Cc: qemu-s390x@nongnu.org (open list:S390 TCG CPUs)
 ---
  include/hw/core/cpu.h   |  3 +++
  target/alpha/cpu.c      |  9 +++++++++
  target/arm/cpu.c        | 13 +++++++++++++
  target/avr/cpu.c        |  8 ++++++++
  target/cris/cpu.c       |  8 ++++++++
  target/hexagon/cpu.c    |  8 ++++++++
  target/hppa/cpu.c       |  8 ++++++++
  target/i386/cpu.c       |  9 +++++++++
  target/loongarch/cpu.c  |  9 +++++++++
  target/m68k/cpu.c       |  8 ++++++++
  target/microblaze/cpu.c |  8 ++++++++
  target/mips/cpu.c       |  8 ++++++++
  target/nios2/cpu.c      |  9 +++++++++
  target/openrisc/cpu.c   |  8 ++++++++
  target/ppc/cpu_init.c   |  8 ++++++++
  target/riscv/cpu.c      | 13 +++++++++++++
  target/rx/cpu.c         |  8 ++++++++
  target/s390x/cpu.c      |  8 ++++++++
  target/sh4/cpu.c        |  8 ++++++++
  target/sparc/cpu.c      |  8 ++++++++
  target/tricore/cpu.c    |  9 +++++++++
  target/xtensa/cpu.c     |  8 ++++++++
 files changed, 186 insertions(+)
-diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h
+diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
 index XXXXXXX..XXXXXXX 100644
---- a/linux-user/arm/target_cpu.h
+--- a/include/hw/core/cpu.h
-+++ b/linux-user/arm/target_cpu.h
++++ b/include/hw/core/cpu.h
-@@ -XXX,XX +XXX,XX @@ static inline unsigned long arm_max_reserved_va(CPUState *cs)
+@@ -XXX,XX +XXX,XX @@ struct SysemuCPUOps;
-     } else {
+  *       If the target behaviour here is anything other than "set
-         /*
+  *       the PC register to the value passed in" then the target must
-          * We need to be able to map the commpage.
+  *       also implement the synchronize_from_tb hook.
--         * See validate_guest_space in linux-user/elfload.c.
++ * @get_pc: Callback for getting the Program Counter register.
-+         * See init_guest_commpage in linux-user/elfload.c.
++ *       As above, with the semantics of the target architecture.
-          */
+  * @gdb_read_register: Callback for letting GDB read a register.
--        return 0xffff0000ul;
+  * @gdb_write_register: Callback for letting GDB write a register.
-+        return 0xfffffffful;
+  * @gdb_adjust_breakpoint: Callback for adjusting the address of a
@@ -XXX,XX +XXX,XX @@ struct CPUClass {
      void (*dump_state)(CPUState *cpu, FILE *, int flags);
      int64_t (*get_arch_id)(CPUState *cpu);
      void (*set_pc)(CPUState *cpu, vaddr value);
 +    vaddr (*get_pc)(CPUState *cpu);
      int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
      int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
      vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
 diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/alpha/cpu.c
 +++ b/target/alpha/cpu.c
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.pc = value;
  }
 +static vaddr alpha_cpu_get_pc(CPUState *cs)
 +{
 +    AlphaCPU *cpu = ALPHA_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
 +
  static bool alpha_cpu_has_work(CPUState *cs)
  {
      /* Here we are checking to see if the CPU should wake up from HALT.
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = alpha_cpu_has_work;
      cc->dump_state = alpha_cpu_dump_state;
      cc->set_pc = alpha_cpu_set_pc;
 +    cc->get_pc = alpha_cpu_get_pc;
      cc->gdb_read_register = alpha_cpu_gdb_read_register;
      cc->gdb_write_register = alpha_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/arm/cpu.c b/target/arm/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu.c
 +++ b/target/arm/cpu.c
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value)
      }
  }
- #define MAX_RESERVED_VA  arm_max_reserved_va
-diff --git a/linux-user/elfload.c b/linux-user/elfload.c
++static vaddr arm_cpu_get_pc(CPUState *cs)
-index XXXXXXX..XXXXXXX 100644
++{
---- a/linux-user/elfload.c
++    ARMCPU *cpu = ARM_CPU(cs);
-+++ b/linux-user/elfload.c
++    CPUARMState *env = &cpu->env;
-@@ -XXX,XX +XXX,XX @@ enum {
++
++    if (is_a64(env)) {
- static bool init_guest_commpage(void)
++        return env->pc;
- {
++    } else {
--    void *want = g2h_untagged(HI_COMMPAGE & -qemu_host_page_size);
++        return env->regs[15];
-+    abi_ptr commpage = HI_COMMPAGE & -qemu_host_page_size;
++    }
-+    void *want = g2h_untagged(commpage);
++}
-     void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE,
++
-                       MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ #ifdef CONFIG_TCG
+ void arm_cpu_synchronize_from_tb(CPUState *cs,
-@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
+                                  const TranslationBlock *tb)
-         perror("Protecting guest commpage");
+@@ -XXX,XX +XXX,XX @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
-         exit(EXIT_FAILURE);
+     cc->has_work = arm_cpu_has_work;
      cc->dump_state = arm_cpu_dump_state;
      cc->set_pc = arm_cpu_set_pc;
 +    cc->get_pc = arm_cpu_get_pc;
      cc->gdb_read_register = arm_cpu_gdb_read_register;
      cc->gdb_write_register = arm_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/avr/cpu.c b/target/avr/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/avr/cpu.c
 +++ b/target/avr/cpu.c
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.pc_w = value / 2; /* internally PC points to words */
  }
 +static vaddr avr_cpu_get_pc(CPUState *cs)
 +{
 +    AVRCPU *cpu = AVR_CPU(cs);
 +
 +    return cpu->env.pc_w * 2;
 +}
 +
  static bool avr_cpu_has_work(CPUState *cs)
  {
      AVRCPU *cpu = AVR_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = avr_cpu_has_work;
      cc->dump_state = avr_cpu_dump_state;
      cc->set_pc = avr_cpu_set_pc;
 +    cc->get_pc = avr_cpu_get_pc;
      dc->vmsd = &vms_avr_cpu;
      cc->sysemu_ops = &avr_sysemu_ops;
      cc->disas_set_info = avr_cpu_disas_set_info;
 diff --git a/target/cris/cpu.c b/target/cris/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/cris/cpu.c
 +++ b/target/cris/cpu.c
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.pc = value;
  }
 +static vaddr cris_cpu_get_pc(CPUState *cs)
 +{
 +    CRISCPU *cpu = CRIS_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
  static bool cris_cpu_has_work(CPUState *cs)
  {
      return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = cris_cpu_has_work;
      cc->dump_state = cris_cpu_dump_state;
      cc->set_pc = cris_cpu_set_pc;
 +    cc->get_pc = cris_cpu_get_pc;
      cc->gdb_read_register = cris_cpu_gdb_read_register;
      cc->gdb_write_register = cris_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/hexagon/cpu.c
 +++ b/target/hexagon/cpu.c
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_set_pc(CPUState *cs, vaddr value)
      env->gpr[HEX_REG_PC] = value;
  }
 +static vaddr hexagon_cpu_get_pc(CPUState *cs)
 +{
 +    HexagonCPU *cpu = HEXAGON_CPU(cs);
 +    CPUHexagonState *env = &cpu->env;
 +    return env->gpr[HEX_REG_PC];
 +}
 +
  static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
                                              const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
      cc->has_work = hexagon_cpu_has_work;
      cc->dump_state = hexagon_dump_state;
      cc->set_pc = hexagon_cpu_set_pc;
 +    cc->get_pc = hexagon_cpu_get_pc;
      cc->gdb_read_register = hexagon_gdb_read_register;
      cc->gdb_write_register = hexagon_gdb_write_register;
      cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS;
 diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/hppa/cpu.c
 +++ b/target/hppa/cpu.c
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.iaoq_b = value + 4;
  }
 +static vaddr hppa_cpu_get_pc(CPUState *cs)
 +{
 +    HPPACPU *cpu = HPPA_CPU(cs);
 +
 +    return cpu->env.iaoq_f;
 +}
 +
  static void hppa_cpu_synchronize_from_tb(CPUState *cs,
                                           const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = hppa_cpu_has_work;
      cc->dump_state = hppa_cpu_dump_state;
      cc->set_pc = hppa_cpu_set_pc;
 +    cc->get_pc = hppa_cpu_get_pc;
      cc->gdb_read_register = hppa_cpu_gdb_read_register;
      cc->gdb_write_register = hppa_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/i386/cpu.c b/target/i386/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/i386/cpu.c
 +++ b/target/i386/cpu.c
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.eip = value;
  }
 +static vaddr x86_cpu_get_pc(CPUState *cs)
 +{
 +    X86CPU *cpu = X86_CPU(cs);
 +
 +    /* Match cpu_get_tb_cpu_state. */
 +    return cpu->env.eip + cpu->env.segs[R_CS].base;
 +}
 +
  int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
  {
      X86CPU *cpu = X86_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
      cc->has_work = x86_cpu_has_work;
      cc->dump_state = x86_cpu_dump_state;
      cc->set_pc = x86_cpu_set_pc;
 +    cc->get_pc = x86_cpu_get_pc;
      cc->gdb_read_register = x86_cpu_gdb_read_register;
      cc->gdb_write_register = x86_cpu_gdb_write_register;
      cc->get_arch_id = x86_cpu_get_arch_id;
 diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/loongarch/cpu.c
 +++ b/target/loongarch/cpu.c
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_set_pc(CPUState *cs, vaddr value)
      env->pc = value;
  }
 +static vaddr loongarch_cpu_get_pc(CPUState *cs)
 +{
 +    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
 +    CPULoongArchState *env = &cpu->env;
 +
 +    return env->pc;
 +}
 +
  #ifndef CONFIG_USER_ONLY
  #include "hw/loongarch/virt.h"
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data)
      cc->has_work = loongarch_cpu_has_work;
      cc->dump_state = loongarch_cpu_dump_state;
      cc->set_pc = loongarch_cpu_set_pc;
 +    cc->get_pc = loongarch_cpu_get_pc;
  #ifndef CONFIG_USER_ONLY
      dc->vmsd = &vmstate_loongarch_cpu;
      cc->sysemu_ops = &loongarch_sysemu_ops;
 diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/m68k/cpu.c
 +++ b/target/m68k/cpu.c
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.pc = value;
  }
 +static vaddr m68k_cpu_get_pc(CPUState *cs)
 +{
 +    M68kCPU *cpu = M68K_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
  static bool m68k_cpu_has_work(CPUState *cs)
  {
      return cs->interrupt_request & CPU_INTERRUPT_HARD;
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
      cc->has_work = m68k_cpu_has_work;
      cc->dump_state = m68k_cpu_dump_state;
      cc->set_pc = m68k_cpu_set_pc;
 +    cc->get_pc = m68k_cpu_get_pc;
      cc->gdb_read_register = m68k_cpu_gdb_read_register;
      cc->gdb_write_register = m68k_cpu_gdb_write_register;
  #if defined(CONFIG_SOFTMMU)
 diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/microblaze/cpu.c
 +++ b/target/microblaze/cpu.c
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.iflags = 0;
  }
 +static vaddr mb_cpu_get_pc(CPUState *cs)
 +{
 +    MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
  static void mb_cpu_synchronize_from_tb(CPUState *cs,
                                         const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
      cc->dump_state = mb_cpu_dump_state;
      cc->set_pc = mb_cpu_set_pc;
 +    cc->get_pc = mb_cpu_get_pc;
      cc->gdb_read_register = mb_cpu_gdb_read_register;
      cc->gdb_write_register = mb_cpu_gdb_write_register;
 diff --git a/target/mips/cpu.c b/target/mips/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/mips/cpu.c
 +++ b/target/mips/cpu.c
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_set_pc(CPUState *cs, vaddr value)
      mips_env_set_pc(&cpu->env, value);
  }
 +static vaddr mips_cpu_get_pc(CPUState *cs)
 +{
 +    MIPSCPU *cpu = MIPS_CPU(cs);
 +
 +    return cpu->env.active_tc.PC;
 +}
 +
  static bool mips_cpu_has_work(CPUState *cs)
  {
      MIPSCPU *cpu = MIPS_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
      cc->has_work = mips_cpu_has_work;
      cc->dump_state = mips_cpu_dump_state;
      cc->set_pc = mips_cpu_set_pc;
 +    cc->get_pc = mips_cpu_get_pc;
      cc->gdb_read_register = mips_cpu_gdb_read_register;
      cc->gdb_write_register = mips_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/nios2/cpu.c
 +++ b/target/nios2/cpu.c
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_set_pc(CPUState *cs, vaddr value)
      env->pc = value;
  }
 +static vaddr nios2_cpu_get_pc(CPUState *cs)
 +{
 +    Nios2CPU *cpu = NIOS2_CPU(cs);
 +    CPUNios2State *env = &cpu->env;
 +
 +    return env->pc;
 +}
 +
  static bool nios2_cpu_has_work(CPUState *cs)
  {
      return cs->interrupt_request & CPU_INTERRUPT_HARD;
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = nios2_cpu_has_work;
      cc->dump_state = nios2_cpu_dump_state;
      cc->set_pc = nios2_cpu_set_pc;
 +    cc->get_pc = nios2_cpu_get_pc;
      cc->disas_set_info = nios2_cpu_disas_set_info;
  #ifndef CONFIG_USER_ONLY
      cc->sysemu_ops = &nios2_sysemu_ops;
 diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/openrisc/cpu.c
 +++ b/target/openrisc/cpu.c
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.dflag = 0;
  }
 +static vaddr openrisc_cpu_get_pc(CPUState *cs)
 +{
 +    OpenRISCCPU *cpu = OPENRISC_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
  static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
                                               const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = openrisc_cpu_has_work;
      cc->dump_state = openrisc_cpu_dump_state;
      cc->set_pc = openrisc_cpu_set_pc;
 +    cc->get_pc = openrisc_cpu_get_pc;
      cc->gdb_read_register = openrisc_cpu_gdb_read_register;
      cc->gdb_write_register = openrisc_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/ppc/cpu_init.c
 +++ b/target/ppc/cpu_init.c
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.nip = value;
  }
 +static vaddr ppc_cpu_get_pc(CPUState *cs)
 +{
 +    PowerPCCPU *cpu = POWERPC_CPU(cs);
 +
 +    return cpu->env.nip;
 +}
 +
  static bool ppc_cpu_has_work(CPUState *cs)
  {
      PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = ppc_cpu_has_work;
      cc->dump_state = ppc_cpu_dump_state;
      cc->set_pc = ppc_cpu_set_pc;
 +    cc->get_pc = ppc_cpu_get_pc;
      cc->gdb_read_register = ppc_cpu_gdb_read_register;
      cc->gdb_write_register = ppc_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/riscv/cpu.c
 +++ b/target/riscv/cpu.c
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
      }
-+
+ }
-+    page_set_flags(commpage, commpage + qemu_host_page_size,
-+                   PAGE_READ | PAGE_EXEC | PAGE_VALID);
++static vaddr riscv_cpu_get_pc(CPUState *cs)
-     return true;
++{
- }
++    RISCVCPU *cpu = RISCV_CPU(cs);
++    CPURISCVState *env = &cpu->env;
 +
 +    /* Match cpu_get_tb_cpu_state. */
 +    if (env->xl == MXL_RV32) {
 +        return env->pc & UINT32_MAX;
 +    }
 +    return env->pc;
 +}
 +
  static void riscv_cpu_synchronize_from_tb(CPUState *cs,
                                            const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
      cc->has_work = riscv_cpu_has_work;
      cc->dump_state = riscv_cpu_dump_state;
      cc->set_pc = riscv_cpu_set_pc;
 +    cc->get_pc = riscv_cpu_get_pc;
      cc->gdb_read_register = riscv_cpu_gdb_read_register;
      cc->gdb_write_register = riscv_cpu_gdb_write_register;
      cc->gdb_num_core_regs = 33;
 diff --git a/target/rx/cpu.c b/target/rx/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/rx/cpu.c
 +++ b/target/rx/cpu.c
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.pc = value;
  }
 +static vaddr rx_cpu_get_pc(CPUState *cs)
 +{
 +    RXCPU *cpu = RX_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
  static void rx_cpu_synchronize_from_tb(CPUState *cs,
                                         const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_class_init(ObjectClass *klass, void *data)
      cc->has_work = rx_cpu_has_work;
      cc->dump_state = rx_cpu_dump_state;
      cc->set_pc = rx_cpu_set_pc;
 +    cc->get_pc = rx_cpu_get_pc;
  #ifndef CONFIG_USER_ONLY
      cc->sysemu_ops = &rx_sysemu_ops;
 diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/s390x/cpu.c
 +++ b/target/s390x/cpu.c
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.psw.addr = value;
  }
 +static vaddr s390_cpu_get_pc(CPUState *cs)
 +{
 +    S390CPU *cpu = S390_CPU(cs);
 +
 +    return cpu->env.psw.addr;
 +}
 +
  static bool s390_cpu_has_work(CPUState *cs)
  {
      S390CPU *cpu = S390_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = s390_cpu_has_work;
      cc->dump_state = s390_cpu_dump_state;
      cc->set_pc = s390_cpu_set_pc;
 +    cc->get_pc = s390_cpu_get_pc;
      cc->gdb_read_register = s390_cpu_gdb_read_register;
      cc->gdb_write_register = s390_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/sh4/cpu.c
 +++ b/target/sh4/cpu.c
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.pc = value;
  }
 +static vaddr superh_cpu_get_pc(CPUState *cs)
 +{
 +    SuperHCPU *cpu = SUPERH_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
  static void superh_cpu_synchronize_from_tb(CPUState *cs,
                                             const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = superh_cpu_has_work;
      cc->dump_state = superh_cpu_dump_state;
      cc->set_pc = superh_cpu_set_pc;
 +    cc->get_pc = superh_cpu_get_pc;
      cc->gdb_read_register = superh_cpu_gdb_read_register;
      cc->gdb_write_register = superh_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/sparc/cpu.c
 +++ b/target/sparc/cpu.c
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.npc = value + 4;
  }
 +static vaddr sparc_cpu_get_pc(CPUState *cs)
 +{
 +    SPARCCPU *cpu = SPARC_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
  static void sparc_cpu_synchronize_from_tb(CPUState *cs,
                                            const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
      cc->memory_rw_debug = sparc_cpu_memory_rw_debug;
  #endif
      cc->set_pc = sparc_cpu_set_pc;
 +    cc->get_pc = sparc_cpu_get_pc;
      cc->gdb_read_register = sparc_cpu_gdb_read_register;
      cc->gdb_write_register = sparc_cpu_gdb_write_register;
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/tricore/cpu.c
 +++ b/target/tricore/cpu.c
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_set_pc(CPUState *cs, vaddr value)
      env->PC = value & ~(target_ulong)1;
  }
 +static vaddr tricore_cpu_get_pc(CPUState *cs)
 +{
 +    TriCoreCPU *cpu = TRICORE_CPU(cs);
 +    CPUTriCoreState *env = &cpu->env;
 +
 +    return env->PC;
 +}
 +
  static void tricore_cpu_synchronize_from_tb(CPUState *cs,
                                              const TranslationBlock *tb)
  {
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_class_init(ObjectClass *c, void *data)
      cc->dump_state = tricore_cpu_dump_state;
      cc->set_pc = tricore_cpu_set_pc;
 +    cc->get_pc = tricore_cpu_get_pc;
      cc->sysemu_ops = &tricore_sysemu_ops;
      cc->tcg_ops = &tricore_tcg_ops;
  }
 diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/xtensa/cpu.c
 +++ b/target/xtensa/cpu.c
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_set_pc(CPUState *cs, vaddr value)
      cpu->env.pc = value;
  }
 +static vaddr xtensa_cpu_get_pc(CPUState *cs)
 +{
 +    XtensaCPU *cpu = XTENSA_CPU(cs);
 +
 +    return cpu->env.pc;
 +}
 +
  static bool xtensa_cpu_has_work(CPUState *cs)
  {
  #ifndef CONFIG_USER_ONLY
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
      cc->has_work = xtensa_cpu_has_work;
      cc->dump_state = xtensa_cpu_dump_state;
      cc->set_pc = xtensa_cpu_set_pc;
 +    cc->get_pc = xtensa_cpu_get_pc;
      cc->gdb_read_register = xtensa_cpu_gdb_read_register;
      cc->gdb_write_register = xtensa_cpu_gdb_write_register;
      cc->gdb_stop_before_watchpoint = true;
 --
 .34.1

-[PULL 13/20] accel/tcg: Document the faulting lookup in tb_lookup_cmp
+[PULL 17/20] accel/tcg: Introduce tb_pc and log_pc
-It was non-obvious to me why we can raise an exception in
+The availability of tb->pc will shortly be conditional.
-the middle of a comparison function, but it works.
+Introduce accessor functions to minimize ifdefs.
 While nearby, use TARGET_PAGE_ALIGN instead of open-coding.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Pass around a known pc to places like tcg_gen_code,
 where the caller must already have the value.
 Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- accel/tcg/cpu-exec.c | 11 ++++++++++-
+ accel/tcg/internal.h                    |  6 ++++
-file changed, 10 insertions(+), 1 deletion(-)
+ include/exec/exec-all.h                 |  6 ++++
  include/tcg/tcg.h                       |  2 +-
  accel/tcg/cpu-exec.c                    | 46 ++++++++++++++-----------
  accel/tcg/translate-all.c               | 37 +++++++++++---------
  target/arm/cpu.c                        |  4 +--
  target/avr/cpu.c                        |  2 +-
  target/hexagon/cpu.c                    |  2 +-
  target/hppa/cpu.c                       |  4 +--
  target/i386/tcg/tcg-cpu.c               |  2 +-
  target/loongarch/cpu.c                  |  2 +-
  target/microblaze/cpu.c                 |  2 +-
  target/mips/tcg/exception.c             |  2 +-
  target/mips/tcg/sysemu/special_helper.c |  2 +-
  target/openrisc/cpu.c                   |  2 +-
  target/riscv/cpu.c                      |  4 +--
  target/rx/cpu.c                         |  2 +-
  target/sh4/cpu.c                        |  4 +--
  target/sparc/cpu.c                      |  2 +-
  target/tricore/cpu.c                    |  2 +-
  tcg/tcg.c                               |  8 ++---
 files changed, 82 insertions(+), 61 deletions(-)
+diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
+index XXXXXXX..XXXXXXX 100644
+--- a/accel/tcg/internal.h
++++ b/accel/tcg/internal.h
+@@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
+ void page_init(void);
+ void tb_htable_init(void);
++/* Return the current PC from CPU, which may be cached in TB. */
++static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
++{
++    return tb_pc(tb);
++}
++
+ #endif /* ACCEL_TCG_INTERNAL_H */
+diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/exec/exec-all.h
++++ b/include/exec/exec-all.h
+@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
+     uintptr_t jmp_dest[2];
+ };
++/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
++static inline target_ulong tb_pc(const TranslationBlock *tb)
++{
++    return tb->pc;
++}
++
+ /* Hide the qatomic_read to make code a little easier on the eyes */
+ static inline uint32_t tb_cflags(const TranslationBlock *tb)
+ {
+diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/tcg/tcg.h
++++ b/include/tcg/tcg.h
+@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
+ void tcg_prologue_init(TCGContext *s);
+ void tcg_func_start(TCGContext *s);
+-int tcg_gen_code(TCGContext *s, TranslationBlock *tb);
++int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
+ void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
 diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cpu-exec.c
 +++ b/accel/tcg/cpu-exec.c
 @@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
-             tb_page_addr_t phys_page2;
+     const TranslationBlock *tb = p;
-             target_ulong virt_page2;
+     const struct tb_desc *desc = d;
--            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+-    if (tb->pc == desc->pc &&
-+            /*
++    if (tb_pc(tb) == desc->pc &&
-+             * We know that the first page matched, and an otherwise valid TB
+         tb->page_addr[0] == desc->page_addr0 &&
-+             * encountered an incomplete instruction at the end of that page,
+         tb->cs_base == desc->cs_base &&
-+             * therefore we know that generating a new TB from the current PC
+         tb->flags == desc->flags &&
-+             * must also require reading from the next page -- even if the
+@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
-+             * second pages do not match, and therefore the resulting insn
+     return tb;
-+             * is different for the new TB.  Therefore any exception raised
+ }
-+             * here by the faulting lookup is not premature.
-+             */
+-static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
-+            virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
+-                                const TranslationBlock *tb)
-             phys_page2 = get_page_addr_code(desc->env, virt_page2);
++static void log_cpu_exec(target_ulong pc, CPUState *cpu,
-             if (tb->page_addr[1] == phys_page2) {
++                         const TranslationBlock *tb)
-                 return true;
+ {
 -    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
 -        && qemu_log_in_addr_range(pc)) {
 -
 +    if (qemu_log_in_addr_range(pc)) {
          qemu_log_mask(CPU_LOG_EXEC,
                        "Trace %d: %p [" TARGET_FMT_lx
                        "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
@@ -XXX,XX +XXX,XX @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
          return tcg_code_gen_epilogue;
      }
 -    log_cpu_exec(pc, cpu, tb);
 +    if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
 +        log_cpu_exec(pc, cpu, tb);
 +    }
      return tb->tc.ptr;
  }
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
      TranslationBlock *last_tb;
      const void *tb_ptr = itb->tc.ptr;
 -    log_cpu_exec(itb->pc, cpu, itb);
 +    if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
 +        log_cpu_exec(log_pc(cpu, itb), cpu, itb);
 +    }
      qemu_thread_jit_execute();
      ret = tcg_qemu_tb_exec(env, tb_ptr);
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
           * of the start of the TB.
           */
          CPUClass *cc = CPU_GET_CLASS(cpu);
 -        qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
 -                               "Stopped execution of TB chain before %p ["
 -                               TARGET_FMT_lx "] %s\n",
 -                               last_tb->tc.ptr, last_tb->pc,
 -                               lookup_symbol(last_tb->pc));
 +
          if (cc->tcg_ops->synchronize_from_tb) {
              cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
          } else {
              assert(cc->set_pc);
 -            cc->set_pc(cpu, last_tb->pc);
 +            cc->set_pc(cpu, tb_pc(last_tb));
 +        }
 +        if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
 +            target_ulong pc = log_pc(cpu, last_tb);
 +            if (qemu_log_in_addr_range(pc)) {
 +                qemu_log("Stopped execution of TB chain before %p ["
 +                         TARGET_FMT_lx "] %s\n",
 +                         last_tb->tc.ptr, pc, lookup_symbol(pc));
 +            }
          }
      }
@@ -XXX,XX +XXX,XX @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
      qemu_spin_unlock(&tb_next->jmp_lock);
 -    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
 -                           "Linking TBs %p [" TARGET_FMT_lx
 -                           "] index %d -> %p [" TARGET_FMT_lx "]\n",
 -                           tb->tc.ptr, tb->pc, n,
 -                           tb_next->tc.ptr, tb_next->pc);
 +    qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
 +                  tb->tc.ptr, n, tb_next->tc.ptr);
      return;
   out_unlock_next:
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
  }
  static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
 +                                    target_ulong pc,
                                      TranslationBlock **last_tb, int *tb_exit)
  {
      int32_t insns_left;
 -    trace_exec_tb(tb, tb->pc);
 +    trace_exec_tb(tb, pc);
      tb = cpu_tb_exec(cpu, tb, tb_exit);
      if (*tb_exit != TB_EXIT_REQUESTED) {
          *last_tb = tb;
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
                  tb_add_jump(last_tb, tb_exit, tb);
              }
 -            cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
 +            cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
              /* Try to align the host and virtual clocks
                 if the guest is in advance */
 diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translate-all.c
 +++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
          for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
              if (i == 0) {
 -                prev = (j == 0 ? tb->pc : 0);
 +                prev = (j == 0 ? tb_pc(tb) : 0);
              } else {
                  prev = tcg_ctx->gen_insn_data[i - 1][j];
              }
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
  static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                       uintptr_t searched_pc, bool reset_icount)
  {
 -    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
 +    target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
      uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
      CPUArchState *env = cpu->env_ptr;
      const uint8_t *p = tb->tc.ptr + tb->tc.size;
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
      const TranslationBlock *a = ap;
      const TranslationBlock *b = bp;
 -    return a->pc == b->pc &&
 +    return tb_pc(a) == tb_pc(b) &&
          a->cs_base == b->cs_base &&
          a->flags == b->flags &&
          (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
@@ -XXX,XX +XXX,XX @@ static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
      TranslationBlock *tb = p;
      target_ulong addr = *(target_ulong *)userp;
 -    if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
 +    if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) ||
 +          addr >= tb_pc(tb) + tb->size)) {
          printf("ERROR invalidate: address=" TARGET_FMT_lx
 -               " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
 +               " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size);
      }
  }
@@ -XXX,XX +XXX,XX @@ static void do_tb_page_check(void *p, uint32_t hash, void *userp)
      TranslationBlock *tb = p;
      int flags1, flags2;
 -    flags1 = page_get_flags(tb->pc);
 -    flags2 = page_get_flags(tb->pc + tb->size - 1);
 +    flags1 = page_get_flags(tb_pc(tb));
 +    flags2 = page_get_flags(tb_pc(tb) + tb->size - 1);
      if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
          printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
 -               (long)tb->pc, tb->size, flags1, flags2);
 +               (long)tb_pc(tb), tb->size, flags1, flags2);
      }
  }
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
      /* remove the TB from the hash list */
      phys_pc = tb->page_addr[0];
 -    h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
 +    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
                       tb->trace_vcpu_dstate);
      if (!qht_remove(&tb_ctx.htable, tb, h)) {
          return;
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
      }
      /* add in the hash table */
 -    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
 +    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
                       tb->trace_vcpu_dstate);
      qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
      tcg_ctx->cpu = NULL;
      max_insns = tb->icount;
 -    trace_translate_block(tb, tb->pc, tb->tc.ptr);
 +    trace_translate_block(tb, pc, tb->tc.ptr);
      /* generate machine code */
      tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
      ti = profile_getclock();
  #endif
 -    gen_code_size = tcg_gen_code(tcg_ctx, tb);
 +    gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
      if (unlikely(gen_code_size < 0)) {
   error_return:
          switch (gen_code_size) {
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
  #ifdef DEBUG_DISAS
      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
 -        qemu_log_in_addr_range(tb->pc)) {
 +        qemu_log_in_addr_range(pc)) {
          FILE *logfile = qemu_log_trylock();
          if (logfile) {
              int code_size, data_size;
@@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
       */
      cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
 -    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
 -                           "cpu_io_recompile: rewound execution of TB to "
 -                           TARGET_FMT_lx "\n", tb->pc);
 +    if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
 +        target_ulong pc = log_pc(cpu, tb);
 +        if (qemu_log_in_addr_range(pc)) {
 +            qemu_log("cpu_io_recompile: rewound execution of TB to "
 +                     TARGET_FMT_lx "\n", pc);
 +        }
 +    }
      cpu_loop_exit_noexc(cpu);
  }
 diff --git a/target/arm/cpu.c b/target/arm/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/arm/cpu.c
 +++ b/target/arm/cpu.c
@@ -XXX,XX +XXX,XX @@ void arm_cpu_synchronize_from_tb(CPUState *cs,
       * never possible for an AArch64 TB to chain to an AArch32 TB.
       */
      if (is_a64(env)) {
 -        env->pc = tb->pc;
 +        env->pc = tb_pc(tb);
      } else {
 -        env->regs[15] = tb->pc;
 +        env->regs[15] = tb_pc(tb);
      }
  }
  #endif /* CONFIG_TCG */
 diff --git a/target/avr/cpu.c b/target/avr/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/avr/cpu.c
 +++ b/target/avr/cpu.c
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_synchronize_from_tb(CPUState *cs,
      AVRCPU *cpu = AVR_CPU(cs);
      CPUAVRState *env = &cpu->env;
 -    env->pc_w = tb->pc / 2; /* internally PC points to words */
 +    env->pc_w = tb_pc(tb) / 2; /* internally PC points to words */
  }
  static void avr_cpu_reset(DeviceState *ds)
 diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/hexagon/cpu.c
 +++ b/target/hexagon/cpu.c
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
  {
      HexagonCPU *cpu = HEXAGON_CPU(cs);
      CPUHexagonState *env = &cpu->env;
 -    env->gpr[HEX_REG_PC] = tb->pc;
 +    env->gpr[HEX_REG_PC] = tb_pc(tb);
  }
  static bool hexagon_cpu_has_work(CPUState *cs)
 diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/hppa/cpu.c
 +++ b/target/hppa/cpu.c
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
      HPPACPU *cpu = HPPA_CPU(cs);
  #ifdef CONFIG_USER_ONLY
 -    cpu->env.iaoq_f = tb->pc;
 +    cpu->env.iaoq_f = tb_pc(tb);
      cpu->env.iaoq_b = tb->cs_base;
  #else
      /* Recover the IAOQ values from the GVA + PRIV.  */
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
      int32_t diff = cs_base;
      cpu->env.iasq_f = iasq_f;
 -    cpu->env.iaoq_f = (tb->pc & ~iasq_f) + priv;
 +    cpu->env.iaoq_f = (tb_pc(tb) & ~iasq_f) + priv;
      if (diff) {
          cpu->env.iaoq_b = cpu->env.iaoq_f + diff;
      }
 diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/i386/tcg/tcg-cpu.c
 +++ b/target/i386/tcg/tcg-cpu.c
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
  {
      X86CPU *cpu = X86_CPU(cs);
 -    cpu->env.eip = tb->pc - tb->cs_base;
 +    cpu->env.eip = tb_pc(tb) - tb->cs_base;
  }
  #ifndef CONFIG_USER_ONLY
 diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/loongarch/cpu.c
 +++ b/target/loongarch/cpu.c
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
      LoongArchCPU *cpu = LOONGARCH_CPU(cs);
      CPULoongArchState *env = &cpu->env;
 -    env->pc = tb->pc;
 +    env->pc = tb_pc(tb);
  }
  #endif /* CONFIG_TCG */
 diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/microblaze/cpu.c
 +++ b/target/microblaze/cpu.c
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_synchronize_from_tb(CPUState *cs,
  {
      MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
 -    cpu->env.pc = tb->pc;
 +    cpu->env.pc = tb_pc(tb);
      cpu->env.iflags = tb->flags & IFLAGS_TB_MASK;
  }
 diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/mips/tcg/exception.c
 +++ b/target/mips/tcg/exception.c
@@ -XXX,XX +XXX,XX @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb)
      MIPSCPU *cpu = MIPS_CPU(cs);
      CPUMIPSState *env = &cpu->env;
 -    env->active_tc.PC = tb->pc;
 +    env->active_tc.PC = tb_pc(tb);
      env->hflags &= ~MIPS_HFLAG_BMASK;
      env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
  }
 diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/mips/tcg/sysemu/special_helper.c
 +++ b/target/mips/tcg/sysemu/special_helper.c
@@ -XXX,XX +XXX,XX @@ bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb)
      CPUMIPSState *env = &cpu->env;
      if ((env->hflags & MIPS_HFLAG_BMASK) != 0
 -        && env->active_tc.PC != tb->pc) {
 +        && env->active_tc.PC != tb_pc(tb)) {
          env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
          env->hflags &= ~MIPS_HFLAG_BMASK;
          return true;
 diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/openrisc/cpu.c
 +++ b/target/openrisc/cpu.c
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
  {
      OpenRISCCPU *cpu = OPENRISC_CPU(cs);
 -    cpu->env.pc = tb->pc;
 +    cpu->env.pc = tb_pc(tb);
  }
 diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/riscv/cpu.c
 +++ b/target/riscv/cpu.c
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs,
      RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
      if (xl == MXL_RV32) {
 -        env->pc = (int32_t)tb->pc;
 +        env->pc = (int32_t)tb_pc(tb);
      } else {
 -        env->pc = tb->pc;
 +        env->pc = tb_pc(tb);
      }
  }
 diff --git a/target/rx/cpu.c b/target/rx/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/rx/cpu.c
 +++ b/target/rx/cpu.c
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_synchronize_from_tb(CPUState *cs,
  {
      RXCPU *cpu = RX_CPU(cs);
 -    cpu->env.pc = tb->pc;
 +    cpu->env.pc = tb_pc(tb);
  }
  static bool rx_cpu_has_work(CPUState *cs)
 diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/sh4/cpu.c
 +++ b/target/sh4/cpu.c
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
  {
      SuperHCPU *cpu = SUPERH_CPU(cs);
 -    cpu->env.pc = tb->pc;
 +    cpu->env.pc = tb_pc(tb);
      cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
  }
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
      CPUSH4State *env = &cpu->env;
      if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
 -        && env->pc != tb->pc) {
 +        && env->pc != tb_pc(tb)) {
          env->pc -= 2;
          env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
          return true;
 diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/sparc/cpu.c
 +++ b/target/sparc/cpu.c
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs,
  {
      SPARCCPU *cpu = SPARC_CPU(cs);
 -    cpu->env.pc = tb->pc;
 +    cpu->env.pc = tb_pc(tb);
      cpu->env.npc = tb->cs_base;
  }
 diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/tricore/cpu.c
 +++ b/target/tricore/cpu.c
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_synchronize_from_tb(CPUState *cs,
      TriCoreCPU *cpu = TRICORE_CPU(cs);
      CPUTriCoreState *env = &cpu->env;
 -    env->PC = tb->pc;
 +    env->PC = tb_pc(tb);
  }
  static void tricore_cpu_reset(DeviceState *dev)
 diff --git a/tcg/tcg.c b/tcg/tcg.c
 index XXXXXXX..XXXXXXX 100644
 --- a/tcg/tcg.c
 +++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
  #endif
 -int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 +int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
  {
  #ifdef CONFIG_PROFILER
      TCGProfile *prof = &s->prof;
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
  #ifdef DEBUG_DISAS
      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
 -                 && qemu_log_in_addr_range(tb->pc))) {
 +                 && qemu_log_in_addr_range(pc_start))) {
          FILE *logfile = qemu_log_trylock();
          if (logfile) {
              fprintf(logfile, "OP:\n");
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
      if (s->nb_indirects > 0) {
  #ifdef DEBUG_DISAS
          if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
 -                     && qemu_log_in_addr_range(tb->pc))) {
 +                     && qemu_log_in_addr_range(pc_start))) {
              FILE *logfile = qemu_log_trylock();
              if (logfile) {
                  fprintf(logfile, "OP before indirect lowering:\n");
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
  #ifdef DEBUG_DISAS
      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
 -                 && qemu_log_in_addr_range(tb->pc))) {
 +                 && qemu_log_in_addr_range(pc_start))) {
          FILE *logfile = qemu_log_trylock();
          if (logfile) {
              fprintf(logfile, "OP after optimization and liveness analysis:\n");
 --
 .34.1

-[PULL 10/20] accel/tcg: Make tb_htable_lookup static
+[PULL 18/20] accel/tcg: Introduce TARGET_TB_PCREL
-The function is not used outside of cpu-exec.c.  Move it and
+Prepare for targets to be able to produce TBs that can
-its subroutines up in the file, before the first use.
+run in more than one virtual context.
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
 Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- include/exec/exec-all.h |   3 -
+ accel/tcg/internal.h      |  4 +++
- accel/tcg/cpu-exec.c    | 122 ++++++++++++++++++++--------------------
+ accel/tcg/tb-jmp-cache.h  | 41 +++++++++++++++++++++++++
-files changed, 61 insertions(+), 64 deletions(-)
+ include/exec/cpu-defs.h   |  3 ++
  include/exec/exec-all.h   | 32 ++++++++++++++++++--
  accel/tcg/cpu-exec.c      | 16 ++++++----
  accel/tcg/translate-all.c | 64 ++++++++++++++++++++++++++-------------
 files changed, 131 insertions(+), 29 deletions(-)
+diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
+index XXXXXXX..XXXXXXX 100644
+--- a/accel/tcg/internal.h
++++ b/accel/tcg/internal.h
+@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void);
+ /* Return the current PC from CPU, which may be cached in TB. */
+ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
+ {
++#if TARGET_TB_PCREL
++    return cpu->cc->get_pc(cpu);
++#else
+     return tb_pc(tb);
++#endif
+ }
+ #endif /* ACCEL_TCG_INTERNAL_H */
+diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
+index XXXXXXX..XXXXXXX 100644
+--- a/accel/tcg/tb-jmp-cache.h
++++ b/accel/tcg/tb-jmp-cache.h
+@@ -XXX,XX +XXX,XX @@
+ /*
+  * Accessed in parallel; all accesses to 'tb' must be atomic.
++ * For TARGET_TB_PCREL, accesses to 'pc' must be protected by
++ * a load_acquire/store_release to 'tb'.
+  */
+ struct CPUJumpCache {
+     struct {
+         TranslationBlock *tb;
++#if TARGET_TB_PCREL
++        target_ulong pc;
++#endif
+     } array[TB_JMP_CACHE_SIZE];
+ };
++static inline TranslationBlock *
++tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t hash)
++{
++#if TARGET_TB_PCREL
++    /* Use acquire to ensure current load of pc from jc. */
++    return qatomic_load_acquire(&jc->array[hash].tb);
++#else
++    /* Use rcu_read to ensure current load of pc from *tb. */
++    return qatomic_rcu_read(&jc->array[hash].tb);
++#endif
++}
++
++static inline target_ulong
++tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb)
++{
++#if TARGET_TB_PCREL
++    return jc->array[hash].pc;
++#else
++    return tb_pc(tb);
++#endif
++}
++
++static inline void
++tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash,
++                 TranslationBlock *tb, target_ulong pc)
++{
++#if TARGET_TB_PCREL
++    jc->array[hash].pc = pc;
++    /* Use store_release on tb to ensure pc is written first. */
++    qatomic_store_release(&jc->array[hash].tb, tb);
++#else
++    /* Use the pc value already stored in tb->pc. */
++    qatomic_set(&jc->array[hash].tb, tb);
++#endif
++}
++
+ #endif /* ACCEL_TCG_TB_JMP_CACHE_H */
+diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
+index XXXXXXX..XXXXXXX 100644
+--- a/include/exec/cpu-defs.h
++++ b/include/exec/cpu-defs.h
+@@ -XXX,XX +XXX,XX @@
+ #  error TARGET_PAGE_BITS must be defined in cpu-param.h
+ # endif
+ #endif
++#ifndef TARGET_TB_PCREL
++# define TARGET_TB_PCREL 0
++#endif
+ #define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
 diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
 index XXXXXXX..XXXXXXX 100644
 --- a/include/exec/exec-all.h
 +++ b/include/exec/exec-all.h
-@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
+@@ -XXX,XX +XXX,XX @@ struct tb_tc {
- #endif
+ };
- void tb_flush(CPUState *cpu);
- void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
+ struct TranslationBlock {
--TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+-    target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
--                                   target_ulong cs_base, uint32_t flags,
+-    target_ulong cs_base; /* CS base for this block */
--                                   uint32_t cflags);
++#if !TARGET_TB_PCREL
- void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
++    /*
++     * Guest PC corresponding to this block.  This must be the true
- /* GETPC is the true target of the return instruction that we'll execute.  */
++     * virtual address.  Therefore e.g. x86 stores EIP + CS_BASE, and
 +     * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
 +     * privilege, must store those bits elsewhere.
 +     *
 +     * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
 +     * written such that the TB is associated only with the physical
 +     * page and may be run in any virtual address context.  In this case,
 +     * PC must always be taken from ENV in a target-specific manner.
 +     * Unwind information is taken as offsets from the page, to be
 +     * deposited into the "current" PC.
 +     */
 +    target_ulong pc;
 +#endif
 +
 +    /*
 +     * Target-specific data associated with the TranslationBlock, e.g.:
 +     * x86: the original user, the Code Segment virtual base,
 +     * arm: an extension of tb->flags,
 +     * s390x: instruction data for EXECUTE,
 +     * sparc: the next pc of the instruction queue (for delay slots).
 +     */
 +    target_ulong cs_base;
 +
      uint32_t flags; /* flags defining in which context the code was generated */
      uint32_t cflags;    /* compile flags */
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
  /* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
  static inline target_ulong tb_pc(const TranslationBlock *tb)
  {
 +#if TARGET_TB_PCREL
 +    qemu_build_not_reached();
 +#else
      return tb->pc;
 +#endif
  }
  /* Hide the qatomic_read to make code a little easier on the eyes */
 diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/cpu-exec.c
 +++ b/accel/tcg/cpu-exec.c
-@@ -XXX,XX +XXX,XX @@ uint32_t curr_cflags(CPUState *cpu)
+@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
-     return cflags;
+     const TranslationBlock *tb = p;
- }
+     const struct tb_desc *desc = d;
-+struct tb_desc {
+-    if (tb_pc(tb) == desc->pc &&
-+    target_ulong pc;
++    if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
-+    target_ulong cs_base;
+         tb->page_addr[0] == desc->page_addr0 &&
-+    CPUArchState *env;
+         tb->cs_base == desc->cs_base &&
-+    tb_page_addr_t phys_page1;
+         tb->flags == desc->flags &&
-+    uint32_t flags;
+@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-+    uint32_t cflags;
+         return NULL;
-+    uint32_t trace_vcpu_dstate;
+     }
-+};
+     desc.page_addr0 = phys_pc;
-+
+-    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
-+static bool tb_lookup_cmp(const void *p, const void *d)
++    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
 +                     flags, cflags, *cpu->trace_dstate);
      return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
  }
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
                                            uint32_t flags, uint32_t cflags)
  {
      TranslationBlock *tb;
 +    CPUJumpCache *jc;
      uint32_t hash;
      /* we should never be trying to look up an INVALID tb */
      tcg_debug_assert(!(cflags & CF_INVALID));
      hash = tb_jmp_cache_hash_func(pc);
 -    tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
 +    jc = cpu->tb_jmp_cache;
 +    tb = tb_jmp_cache_get_tb(jc, hash);
      if (likely(tb &&
 -               tb->pc == pc &&
 +               tb_jmp_cache_get_pc(jc, hash, tb) == pc &&
                 tb->cs_base == cs_base &&
                 tb->flags == flags &&
                 tb->trace_vcpu_dstate == *cpu->trace_dstate &&
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
      if (tb == NULL) {
          return NULL;
      }
 -    qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
 +    tb_jmp_cache_set(jc, hash, tb, pc);
      return tb;
  }
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
          if (cc->tcg_ops->synchronize_from_tb) {
              cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
          } else {
 +            assert(!TARGET_TB_PCREL);
              assert(cc->set_pc);
              cc->set_pc(cpu, tb_pc(last_tb));
          }
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
                   * for the fast lookup
                   */
                  h = tb_jmp_cache_hash_func(pc);
 -                qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
 +                tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc);
              }
  #ifndef CONFIG_USER_ONLY
 diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
 index XXXXXXX..XXXXXXX 100644
 --- a/accel/tcg/translate-all.c
 +++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
          for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
              if (i == 0) {
 -                prev = (j == 0 ? tb_pc(tb) : 0);
 +                prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
              } else {
                  prev = tcg_ctx->gen_insn_data[i - 1][j];
              }
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
  static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                       uintptr_t searched_pc, bool reset_icount)
  {
 -    target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
 +    target_ulong data[TARGET_INSN_START_WORDS];
      uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
      CPUArchState *env = cpu->env_ptr;
      const uint8_t *p = tb->tc.ptr + tb->tc.size;
@@ -XXX,XX +XXX,XX @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
          return -1;
      }
 +    memset(data, 0, sizeof(data));
 +    if (!TARGET_TB_PCREL) {
 +        data[0] = tb_pc(tb);
 +    }
 +
      /* Reconstruct the stored insn data while looking for the point at
         which the end of the insn exceeds the searched_pc.  */
      for (i = 0; i < num_insns; ++i) {
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
      const TranslationBlock *a = ap;
      const TranslationBlock *b = bp;
 -    return tb_pc(a) == tb_pc(b) &&
 -        a->cs_base == b->cs_base &&
 -        a->flags == b->flags &&
 -        (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
 -        a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
 -        a->page_addr[0] == b->page_addr[0] &&
 -        a->page_addr[1] == b->page_addr[1];
 +    return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
 +            a->cs_base == b->cs_base &&
 +            a->flags == b->flags &&
 +            (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
 +            a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
 +            a->page_addr[0] == b->page_addr[0] &&
 +            a->page_addr[1] == b->page_addr[1]);
  }
  void tb_htable_init(void)
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
      qemu_spin_unlock(&dest->jmp_lock);
  }
 +static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
 +{
-+    const TranslationBlock *tb = p;
++    CPUState *cpu;
-+    const struct tb_desc *desc = d;
++
-+
++    if (TARGET_TB_PCREL) {
-+    if (tb->pc == desc->pc &&
++        /* A TB may be at any virtual address */
-+        tb->page_addr[0] == desc->phys_page1 &&
++        CPU_FOREACH(cpu) {
-+        tb->cs_base == desc->cs_base &&
++            tcg_flush_jmp_cache(cpu);
-+        tb->flags == desc->flags &&
++        }
-+        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
++    } else {
-+        tb_cflags(tb) == desc->cflags) {
++        uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
-+        /* check next page if needed */
++
-+        if (tb->page_addr[1] == -1) {
++        CPU_FOREACH(cpu) {
-+            return true;
++            CPUJumpCache *jc = cpu->tb_jmp_cache;
-+        } else {
++
-+            tb_page_addr_t phys_page2;
++            if (qatomic_read(&jc->array[h].tb) == tb) {
-+            target_ulong virt_page2;
++                qatomic_set(&jc->array[h].tb, NULL);
 +
 +            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 +            phys_page2 = get_page_addr_code(desc->env, virt_page2);
 +            if (tb->page_addr[1] == phys_page2) {
 +                return true;
 +            }
 +        }
 +    }
-+    return false;
 +}
 +
-+static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+ /*
-+                                          target_ulong cs_base, uint32_t flags,
+  * In user-mode, call with mmap_lock held.
-+                                          uint32_t cflags)
+  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
-+{
+@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
-+    tb_page_addr_t phys_pc;
+  */
-+    struct tb_desc desc;
+ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
-+    uint32_t h;
+ {
-+
+-    CPUState *cpu;
-+    desc.env = cpu->env_ptr;
+     PageDesc *p;
-+    desc.cs_base = cs_base;
+     uint32_t h;
-+    desc.flags = flags;
+     tb_page_addr_t phys_pc;
-+    desc.cflags = cflags;
+@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
-+    desc.trace_vcpu_dstate = *cpu->trace_dstate;
-+    desc.pc = pc;
+     /* remove the TB from the hash list */
-+    phys_pc = get_page_addr_code(desc.env, pc);
+     phys_pc = tb->page_addr[0];
-+    if (phys_pc == -1) {
+-    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
-+        return NULL;
+-                     tb->trace_vcpu_dstate);
-+    }
++    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
-+    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
++                     tb->flags, orig_cflags, tb->trace_vcpu_dstate);
-+    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
+     if (!qht_remove(&tb_ctx.htable, tb, h)) {
-+    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
+         return;
-+}
+     }
-+
+@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
- /* Might cause an exception, so have a longjmp destination ready */
+     }
- static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
-                                           target_ulong cs_base,
+     /* remove the TB from the hash list */
-@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
+-    h = tb_jmp_cache_hash_func(tb->pc);
-     end_exclusive();
+-    CPU_FOREACH(cpu) {
- }
+-        CPUJumpCache *jc = cpu->tb_jmp_cache;
+-        if (qatomic_read(&jc->array[h].tb) == tb) {
--struct tb_desc {
+-            qatomic_set(&jc->array[h].tb, NULL);
 -    target_ulong pc;
 -    target_ulong cs_base;
 -    CPUArchState *env;
 -    tb_page_addr_t phys_page1;
 -    uint32_t flags;
 -    uint32_t cflags;
 -    uint32_t trace_vcpu_dstate;
 -};
 -
 -static bool tb_lookup_cmp(const void *p, const void *d)
 -{
 -    const TranslationBlock *tb = p;
 -    const struct tb_desc *desc = d;
 -
 -    if (tb->pc == desc->pc &&
 -        tb->page_addr[0] == desc->phys_page1 &&
 -        tb->cs_base == desc->cs_base &&
 -        tb->flags == desc->flags &&
 -        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
 -        tb_cflags(tb) == desc->cflags) {
 -        /* check next page if needed */
 -        if (tb->page_addr[1] == -1) {
 -            return true;
 -        } else {
 -            tb_page_addr_t phys_page2;
 -            target_ulong virt_page2;
 -
 -            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 -            phys_page2 = get_page_addr_code(desc->env, virt_page2);
 -            if (tb->page_addr[1] == phys_page2) {
 -                return true;
 -            }
 -        }
 -    }
--    return false;
++    tb_jmp_cache_inval_tb(tb);
--}
--
+     /* suppress this TB from the two jump lists */
--TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+     tb_remove_from_jmp_list(tb, 0);
--                                   target_ulong cs_base, uint32_t flags,
+@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
--                                   uint32_t cflags)
+     }
--{
--    tb_page_addr_t phys_pc;
+     /* add in the hash table */
--    struct tb_desc desc;
+-    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
--    uint32_t h;
+-                     tb->trace_vcpu_dstate);
--
++    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
--    desc.env = cpu->env_ptr;
++                     tb->flags, tb->cflags, tb->trace_vcpu_dstate);
--    desc.cs_base = cs_base;
+     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
--    desc.flags = flags;
--    desc.cflags = cflags;
+     /* remove TB from the page(s) if we couldn't insert it */
--    desc.trace_vcpu_dstate = *cpu->trace_dstate;
+@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
--    desc.pc = pc;
--    phys_pc = get_page_addr_code(desc.env, pc);
+     gen_code_buf = tcg_ctx->code_gen_ptr;
--    if (phys_pc == -1) {
+     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
--        return NULL;
++#if !TARGET_TB_PCREL
--    }
+     tb->pc = pc;
--    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
++#endif
--    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
+     tb->cs_base = cs_base;
--    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
+     tb->flags = flags;
--}
+     tb->cflags = cflags;
 -
  void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
  {
      if (TCG_TARGET_HAS_direct_jump) {
 --
 .34.1

-[PULL 17/20] target/s390x: Make translator stop before the end of a page
+[PULL 19/20] tcg/ppc: Optimize 26-bit jumps
-From: Ilya Leoshkevich <iii@linux.ibm.com>
+From: Leandro Lupori <leandro.lupori@eldorado.org.br>
-Right now translator stops right *after* the end of a page, which
+PowerPC64 processors handle direct branches better than indirect
-breaks reporting of fault locations when the last instruction of a
+ones, resulting in less stalled cycles and branch misses.
 multi-insn translation block crosses a page boundary.
-Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
+However, PPC's tb_target_set_jmp_target() was only using direct
 branches for 16-bit jumps, while PowerPC64's unconditional branch
 instructions are able to handle displacements of up to 26 bits.
 To take advantage of this, now jumps whose displacements fit in
 between 17 and 26 bits are also converted to direct branches.
 Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
-Message-Id: <20220817150506.592862-3-iii@linux.ibm.com>
+Signed-off-by: Leandro Lupori <leandro.lupori@eldorado.org.br>
 [rth: Expanded some commentary.]
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- target/s390x/tcg/translate.c     |  15 +++-
+ tcg/ppc/tcg-target.c.inc | 119 +++++++++++++++++++++++++++++----------
- tests/tcg/s390x/noexec.c         | 106 +++++++++++++++++++++++
+file changed, 88 insertions(+), 31 deletions(-)
  tests/tcg/multiarch/noexec.c.inc | 139 +++++++++++++++++++++++++++++++
  tests/tcg/s390x/Makefile.target  |   1 +
 files changed, 257 insertions(+), 4 deletions(-)
  create mode 100644 tests/tcg/s390x/noexec.c
  create mode 100644 tests/tcg/multiarch/noexec.c.inc
-diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
+diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
 index XXXXXXX..XXXXXXX 100644
---- a/target/s390x/tcg/translate.c
+--- a/tcg/ppc/tcg-target.c.inc
-+++ b/target/s390x/tcg/translate.c
++++ b/tcg/ppc/tcg-target.c.inc
-@@ -XXX,XX +XXX,XX @@ static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
-     dc->insn_start = tcg_last_op();
+     tcg_out32(s, insn);
  }
-+static target_ulong get_next_pc(CPUS390XState *env, DisasContext *s,
++static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2)
 +                                uint64_t pc)
 +{
-+    uint64_t insn = ld_code2(env, s, pc);
++    if (HOST_BIG_ENDIAN) {
-+
++        return (uint64_t)i1 << 32 | i2;
-+    return pc + get_ilen((insn >> 8) & 0xff);
++    }
 +    return (uint64_t)i2 << 32 | i1;
 +}
 +
- static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
++static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw,
- {
++                                  tcg_insn_unit i0, tcg_insn_unit i1)
      CPUS390XState *env = cs->env_ptr;
@@ -XXX,XX +XXX,XX @@ static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
      dc->base.is_jmp = translate_one(env, dc);
      if (dc->base.is_jmp == DISAS_NEXT) {
 -        uint64_t page_start;
 -
 -        page_start = dc->base.pc_first & TARGET_PAGE_MASK;
 -        if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) {
 +        if (!is_same_page(dcbase, dc->base.pc_next) ||
 +            !is_same_page(dcbase, get_next_pc(env, dc, dc->base.pc_next)) ||
 +            dc->ex_value) {
              dc->base.is_jmp = DISAS_TOO_MANY;
          }
      }
 diff --git a/tests/tcg/s390x/noexec.c b/tests/tcg/s390x/noexec.c
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tests/tcg/s390x/noexec.c
@@ -XXX,XX +XXX,XX @@
 +#include "../multiarch/noexec.c.inc"
 +
 +static void *arch_mcontext_pc(const mcontext_t *ctx)
 +{
-+    return (void *)ctx->psw.addr;
++#if TCG_TARGET_REG_BITS == 64
 +    qatomic_set((uint64_t *)rw, make_pair(i0, i1));
 +    flush_idcache_range(rx, rw, 8);
 +#else
 +    qemu_build_not_reached();
 +#endif
 +}
 +
-+static int arch_mcontext_arg(const mcontext_t *ctx)
++static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw,
 +                                  tcg_insn_unit i0, tcg_insn_unit i1,
 +                                  tcg_insn_unit i2, tcg_insn_unit i3)
 +{
-+    return ctx->gregs[2];
++    uint64_t p[2];
 +
 +    p[!HOST_BIG_ENDIAN] = make_pair(i0, i1);
 +    p[HOST_BIG_ENDIAN] = make_pair(i2, i3);
 +
 +    /*
 +     * There's no convenient way to get the compiler to allocate a pair
 +     * of registers at an even index, so copy into r6/r7 and clobber.
 +     */
 +    asm("mr  %%r6, %1\n\t"
 +        "mr  %%r7, %2\n\t"
 +        "stq %%r6, %0"
 +        : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7");
 +    flush_idcache_range(rx, rw, 16);
 +}
 +
-+static void arch_flush(void *p, int len)
+ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
-+{
+                               uintptr_t jmp_rw, uintptr_t addr)
-+}
+ {
 -    if (TCG_TARGET_REG_BITS == 64) {
 -        tcg_insn_unit i1, i2;
 -        intptr_t tb_diff = addr - tc_ptr;
 -        intptr_t br_diff = addr - (jmp_rx + 4);
 -        uint64_t pair;
 +    tcg_insn_unit i0, i1, i2, i3;
 +    intptr_t tb_diff = addr - tc_ptr;
 +    intptr_t br_diff = addr - (jmp_rx + 4);
 +    intptr_t lo, hi;
 -        /* This does not exercise the range of the branch, but we do
 -           still need to be able to load the new value of TCG_REG_TB.
 -           But this does still happen quite often.  */
 -        if (tb_diff == (int16_t)tb_diff) {
 -            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
 -            i2 = B | (br_diff & 0x3fffffc);
 -        } else {
 -            intptr_t lo = (int16_t)tb_diff;
 -            intptr_t hi = (int32_t)(tb_diff - lo);
 -            assert(tb_diff == hi + lo);
 -            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
 -            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
 -        }
 -#if HOST_BIG_ENDIAN
 -        pair = (uint64_t)i1 << 32 | i2;
 -#else
 -        pair = (uint64_t)i2 << 32 | i1;
 -#endif
 -
 -        /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
 -           within qatomic_set that would fail to build a ppc32 host.  */
 -        qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
 -        flush_idcache_range(jmp_rx, jmp_rw, 8);
 -    } else {
 +    if (TCG_TARGET_REG_BITS == 32) {
          intptr_t diff = addr - jmp_rx;
          tcg_debug_assert(in_range_b(diff));
          qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
          flush_idcache_range(jmp_rx, jmp_rw, 4);
 +        return;
      }
 +
-+extern char noexec_1[];
++    /*
-+extern char noexec_2[];
++     * For 16-bit displacements, we can use a single add + branch.
-+extern char noexec_end[];
++     * This happens quite often.
-+
++     */
-+asm("noexec_1:\n"
++    if (tb_diff == (int16_t)tb_diff) {
-+    "   lgfi %r2,1\n"       /* %r2 is 0 on entry, set 1. */
++        i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
-+    "noexec_2:\n"
++        i1 = B | (br_diff & 0x3fffffc);
-+    "   lgfi %r2,2\n"       /* %r2 is 0/1; set 2. */
++        ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
-+    "   br %r14\n"          /* return */
++        return;
 +    "noexec_end:");
 +
 +extern char exrl_1[];
 +extern char exrl_2[];
 +extern char exrl_end[];
 +
 +asm("exrl_1:\n"
 +    "   exrl %r0, exrl_2\n"
 +    "   br %r14\n"
 +    "exrl_2:\n"
 +    "   lgfi %r2,2\n"
 +    "exrl_end:");
 +
 +int main(void)
 +{
 +    struct noexec_test noexec_tests[] = {
 +        {
 +            .name = "fallthrough",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2,
 +            .entry_ofs = noexec_1 - noexec_2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = 0,
 +            .expected_arg = 1,
 +        },
 +        {
 +            .name = "jump",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2,
 +            .entry_ofs = 0,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = 0,
 +            .expected_arg = 0,
 +        },
 +        {
 +            .name = "exrl",
 +            .test_code = exrl_1,
 +            .test_len = exrl_end - exrl_1,
 +            .page_ofs = exrl_1 - exrl_2,
 +            .entry_ofs = exrl_1 - exrl_2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = exrl_1 - exrl_2,
 +            .expected_arg = 0,
 +        },
 +        {
 +            .name = "fallthrough [cross]",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2 - 2,
 +            .entry_ofs = noexec_1 - noexec_2 - 2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = -2,
 +            .expected_arg = 1,
 +        },
 +        {
 +            .name = "jump [cross]",
 +            .test_code = noexec_1,
 +            .test_len = noexec_end - noexec_1,
 +            .page_ofs = noexec_1 - noexec_2 - 2,
 +            .entry_ofs = -2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = -2,
 +            .expected_arg = 0,
 +        },
 +        {
 +            .name = "exrl [cross]",
 +            .test_code = exrl_1,
 +            .test_len = exrl_end - exrl_1,
 +            .page_ofs = exrl_1 - exrl_2 - 2,
 +            .entry_ofs = exrl_1 - exrl_2 - 2,
 +            .expected_si_ofs = 0,
 +            .expected_pc_ofs = exrl_1 - exrl_2 - 2,
 +            .expected_arg = 0,
 +        },
 +    };
 +
 +    return test_noexec(noexec_tests,
 +                       sizeof(noexec_tests) / sizeof(noexec_tests[0]));
 +}
 diff --git a/tests/tcg/multiarch/noexec.c.inc b/tests/tcg/multiarch/noexec.c.inc
 new file mode 100644
 index XXXXXXX..XXXXXXX
 --- /dev/null
 +++ b/tests/tcg/multiarch/noexec.c.inc
@@ -XXX,XX +XXX,XX @@
 +/*
 + * Common code for arch-specific MMU_INST_FETCH fault testing.
 + */
 +
 +#define _GNU_SOURCE
 +
 +#include <assert.h>
 +#include <signal.h>
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <errno.h>
 +#include <unistd.h>
 +#include <sys/mman.h>
 +#include <sys/ucontext.h>
 +
 +/* Forward declarations. */
 +
 +static void *arch_mcontext_pc(const mcontext_t *ctx);
 +static int arch_mcontext_arg(const mcontext_t *ctx);
 +static void arch_flush(void *p, int len);
 +
 +/* Testing infrastructure. */
 +
 +struct noexec_test {
 +    const char *name;
 +    const char *test_code;
 +    int test_len;
 +    int page_ofs;
 +    int entry_ofs;
 +    int expected_si_ofs;
 +    int expected_pc_ofs;
 +    int expected_arg;
 +};
 +
 +static void *page_base;
 +static int page_size;
 +static const struct noexec_test *current_noexec_test;
 +
 +static void handle_err(const char *syscall)
 +{
 +    printf("[  FAILED  ] %s: %s\n", syscall, strerror(errno));
 +    exit(EXIT_FAILURE);
 +}
 +
 +static void handle_segv(int sig, siginfo_t *info, void *ucontext)
 +{
 +    const struct noexec_test *test = current_noexec_test;
 +    const mcontext_t *mc = &((ucontext_t *)ucontext)->uc_mcontext;
 +    void *expected_si;
 +    void *expected_pc;
 +    void *pc;
 +    int arg;
 +
 +    if (test == NULL) {
 +        printf("[  FAILED  ] unexpected SEGV\n");
 +        exit(EXIT_FAILURE);
 +    }
 +    current_noexec_test = NULL;
 +
 +    expected_si = page_base + test->expected_si_ofs;
 +    if (info->si_addr != expected_si) {
 +        printf("[  FAILED  ] wrong si_addr (%p != %p)\n",
 +               info->si_addr, expected_si);
 +        exit(EXIT_FAILURE);
 +    }
 +
-+    pc = arch_mcontext_pc(mc);
++    lo = (int16_t)tb_diff;
-+    expected_pc = page_base + test->expected_pc_ofs;
++    hi = (int32_t)(tb_diff - lo);
-+    if (pc != expected_pc) {
++    assert(tb_diff == hi + lo);
-+        printf("[  FAILED  ] wrong pc (%p != %p)\n", pc, expected_pc);
++    i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
-+        exit(EXIT_FAILURE);
++    i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
 +
 +    /*
 +     * Without stq from 2.07, we can only update two insns,
 +     * and those must be the ones that load the target address.
 +     */
 +    if (!have_isa_2_07) {
 +        ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
 +        return;
 +    }
 +
-+    arg = arch_mcontext_arg(mc);
++    /*
-+    if (arg != test->expected_arg) {
++     * For 26-bit displacements, we can use a direct branch.
-+        printf("[  FAILED  ] wrong arg (%d != %d)\n", arg, test->expected_arg);
++     * Otherwise we still need the indirect branch, which we
-+        exit(EXIT_FAILURE);
++     * must restore after a potential direct branch write.
 +     */
 +    br_diff -= 4;
 +    if (in_range_b(br_diff)) {
 +        i2 = B | (br_diff & 0x3fffffc);
 +        i3 = NOP;
 +    } else {
 +        i2 = MTSPR | RS(TCG_REG_TB) | CTR;
 +        i3 = BCCTR | BO_ALWAYS;
 +    }
-+
++    ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3);
-+    if (mprotect(page_base, page_size,
+ }
-+                 PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
-+        handle_err("mprotect");
+ static void tcg_out_call_int(TCGContext *s, int lk,
-+    }
+@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
-+}
+         if (s->tb_jmp_insn_offset) {
-+
+             /* Direct jump. */
-+static void test_noexec_1(const struct noexec_test *test)
+             if (TCG_TARGET_REG_BITS == 64) {
-+{
+-                /* Ensure the next insns are 8-byte aligned. */
-+    void *start = page_base + test->page_ofs;
+-                if ((uintptr_t)s->code_ptr & 7) {
-+    void (*fn)(int arg) = page_base + test->entry_ofs;
++                /* Ensure the next insns are 8 or 16-byte aligned. */
-+
++                while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) {
-+    memcpy(start, test->test_code, test->test_len);
+                     tcg_out32(s, NOP);
-+    arch_flush(start, test->test_len);
+                 }
-+
+                 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
 +    /* Trigger TB creation in order to test invalidation. */
 +    fn(0);
 +
 +    if (mprotect(page_base, page_size, PROT_NONE) < 0) {
 +        handle_err("mprotect");
 +    }
 +
 +    /* Trigger SEGV and check that handle_segv() ran. */
 +    current_noexec_test = test;
 +    fn(0);
 +    assert(current_noexec_test == NULL);
 +}
 +
 +static int test_noexec(struct noexec_test *tests, size_t n_tests)
 +{
 +    struct sigaction act;
 +    size_t i;
 +
 +    memset(&act, 0, sizeof(act));
 +    act.sa_sigaction = handle_segv;
 +    act.sa_flags = SA_SIGINFO;
 +    if (sigaction(SIGSEGV, &act, NULL) < 0) {
 +        handle_err("sigaction");
 +    }
 +
 +    page_size = getpagesize();
 +    page_base = mmap(NULL, 2 * page_size,
 +                     PROT_READ | PROT_WRITE | PROT_EXEC,
 +                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 +    if (page_base == MAP_FAILED) {
 +        handle_err("mmap");
 +    }
 +    page_base += page_size;
 +
 +    for (i = 0; i < n_tests; i++) {
 +        struct noexec_test *test = &tests[i];
 +
 +        printf("[ RUN      ] %s\n", test->name);
 +        test_noexec_1(test);
 +        printf("[       OK ]\n");
 +    }
 +
 +    printf("[  PASSED  ]\n");
 +    return EXIT_SUCCESS;
 +}
 diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
 index XXXXXXX..XXXXXXX 100644
 --- a/tests/tcg/s390x/Makefile.target
 +++ b/tests/tcg/s390x/Makefile.target
@@ -XXX,XX +XXX,XX @@ TESTS+=shift
  TESTS+=trap
  TESTS+=signals-s390x
  TESTS+=branch-relative-long
 +TESTS+=noexec
  Z14_TESTS=vfminmax
  vfminmax: LDFLAGS+=-lm
 --
 .34.1

-[PULL 02/20] linux-user/hppa: Allocate page zero as a commpage
+[PULL 20/20] target/sh4: Fix TB_FLAG_UNALIGN
-We're about to start validating PAGE_EXEC, which means that we've
+The value previously chosen overlaps GUSA_MASK.
 got to mark page zero executable.  We had been special casing this
 entirely within translate.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
+Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
+that they are included in TB_FLAGs.  Add aliases for the
 FPSCR and SR bits that are included in TB_FLAGS, so that
 we don't accidentally reassign those bits.
 Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
 Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
 Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
 ---
- linux-user/elfload.c | 34 +++++++++++++++++++++++++++++++---
+ target/sh4/cpu.h        | 56 +++++++++++++------------
-file changed, 31 insertions(+), 3 deletions(-)
+ linux-user/sh4/signal.c |  6 +--
  target/sh4/cpu.c        |  6 +--
  target/sh4/helper.c     |  6 +--
  target/sh4/translate.c  | 90 ++++++++++++++++++++++-------------------
 files changed, 88 insertions(+), 76 deletions(-)
-diff --git a/linux-user/elfload.c b/linux-user/elfload.c
+diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
 index XXXXXXX..XXXXXXX 100644
---- a/linux-user/elfload.c
+--- a/target/sh4/cpu.h
-+++ b/linux-user/elfload.c
++++ b/target/sh4/cpu.h
-@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
+@@ -XXX,XX +XXX,XX @@
-     regs->gr[31] = infop->entry;
+ #define FPSCR_RM_NEAREST       (0 << 0)
  #define FPSCR_RM_ZERO          (1 << 0)
 -#define DELAY_SLOT_MASK        0x7
 -#define DELAY_SLOT             (1 << 0)
 -#define DELAY_SLOT_CONDITIONAL (1 << 1)
 -#define DELAY_SLOT_RTE         (1 << 2)
 +#define TB_FLAG_DELAY_SLOT       (1 << 0)
 +#define TB_FLAG_DELAY_SLOT_COND  (1 << 1)
 +#define TB_FLAG_DELAY_SLOT_RTE   (1 << 2)
 +#define TB_FLAG_PENDING_MOVCA    (1 << 3)
 +#define TB_FLAG_GUSA_SHIFT       4                      /* [11:4] */
 +#define TB_FLAG_GUSA_EXCLUSIVE   (1 << 12)
 +#define TB_FLAG_UNALIGN          (1 << 13)
 +#define TB_FLAG_SR_FD            (1 << SR_FD)           /* 15 */
 +#define TB_FLAG_FPSCR_PR         FPSCR_PR               /* 19 */
 +#define TB_FLAG_FPSCR_SZ         FPSCR_SZ               /* 20 */
 +#define TB_FLAG_FPSCR_FR         FPSCR_FR               /* 21 */
 +#define TB_FLAG_SR_RB            (1 << SR_RB)           /* 29 */
 +#define TB_FLAG_SR_MD            (1 << SR_MD)           /* 30 */
 -#define TB_FLAG_PENDING_MOVCA  (1 << 3)
 -#define TB_FLAG_UNALIGN        (1 << 4)
 -
 -#define GUSA_SHIFT             4
 -#ifdef CONFIG_USER_ONLY
 -#define GUSA_EXCLUSIVE         (1 << 12)
 -#define GUSA_MASK              ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
 -#else
 -/* Provide dummy versions of the above to allow tests against tbflags
 -   to be elided while avoiding ifdefs.  */
 -#define GUSA_EXCLUSIVE         0
 -#define GUSA_MASK              0
 -#endif
 -
 -#define TB_FLAG_ENVFLAGS_MASK  (DELAY_SLOT_MASK | GUSA_MASK)
 +#define TB_FLAG_DELAY_SLOT_MASK  (TB_FLAG_DELAY_SLOT |       \
 +                                  TB_FLAG_DELAY_SLOT_COND |  \
 +                                  TB_FLAG_DELAY_SLOT_RTE)
 +#define TB_FLAG_GUSA_MASK        ((0xff << TB_FLAG_GUSA_SHIFT) | \
 +                                  TB_FLAG_GUSA_EXCLUSIVE)
 +#define TB_FLAG_FPSCR_MASK       (TB_FLAG_FPSCR_PR | \
 +                                  TB_FLAG_FPSCR_SZ | \
 +                                  TB_FLAG_FPSCR_FR)
 +#define TB_FLAG_SR_MASK          (TB_FLAG_SR_FD | \
 +                                  TB_FLAG_SR_RB | \
 +                                  TB_FLAG_SR_MD)
 +#define TB_FLAG_ENVFLAGS_MASK    (TB_FLAG_DELAY_SLOT_MASK | \
 +                                  TB_FLAG_GUSA_MASK)
  typedef struct tlb_t {
      uint32_t vpn;        /* virtual page number */
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
  {
      /* The instruction in a RTE delay slot is fetched in privileged
         mode, but executed in user mode.  */
 -    if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
 +    if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
          return 0;
      } else {
          return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
  {
      *pc = env->pc;
      /* For a gUSA region, notice the end of the region.  */
 -    *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
 -    *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
 -            | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
 -            | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
 -            | (env->sr & (1u << SR_FD))                        /* Bit 15 */
 +    *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
 +    *flags = env->flags
 +            | (env->fpscr & TB_FLAG_FPSCR_MASK)
 +            | (env->sr & TB_FLAG_SR_MASK)
              | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
  #ifdef CONFIG_USER_ONLY
      *flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
 diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
 index XXXXXXX..XXXXXXX 100644
 --- a/linux-user/sh4/signal.c
 +++ b/linux-user/sh4/signal.c
@@ -XXX,XX +XXX,XX @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
      __get_user(regs->fpul, &sc->sc_fpul);
      regs->tra = -1;         /* disable syscall checks */
 -    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
 +    regs->flags = 0;
  }
-+#define LO_COMMPAGE  0
+ void setup_frame(int sig, struct target_sigaction *ka,
-+
+@@ -XXX,XX +XXX,XX @@ void setup_frame(int sig, struct target_sigaction *ka,
-+static bool init_guest_commpage(void)
+     regs->gregs[5] = 0;
-+{
+     regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
-+    void *want = g2h_untagged(LO_COMMPAGE);
+     regs->pc = (unsigned long) ka->_sa_handler;
-+    void *addr = mmap(want, qemu_host_page_size, PROT_NONE,
+-    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
-+                      MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
++    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
-+
-+    if (addr == MAP_FAILED) {
+     unlock_user_struct(frame, frame_addr, 1);
-+        perror("Allocating guest commpage");
+     return;
-+        exit(EXIT_FAILURE);
+@@ -XXX,XX +XXX,XX @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
-+    }
+     regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
-+    if (addr != want) {
+     regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
-+        return false;
+     regs->pc = (unsigned long) ka->_sa_handler;
-+    }
+-    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
-+
++    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
-+    /*
-+     * On Linux, page zero is normally marked execute only + gateway.
+     unlock_user_struct(frame, frame_addr, 1);
-+     * Normal read or write is supposed to fail (thus PROT_NONE above),
+     return;
-+     * but specific offsets have kernel code mapped to raise permissions
+diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
-+     * and implement syscalls.  Here, simply mark the page executable.
+index XXXXXXX..XXXXXXX 100644
-+     * Special case the entry points during translation (see do_page_zero).
+--- a/target/sh4/cpu.c
-+     */
++++ b/target/sh4/cpu.c
-+    page_set_flags(LO_COMMPAGE, LO_COMMPAGE + TARGET_PAGE_SIZE,
+@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
-+                   PAGE_EXEC | PAGE_VALID);
+     SuperHCPU *cpu = SUPERH_CPU(cs);
-+    return true;
-+}
+     cpu->env.pc = tb_pc(tb);
-+
+-    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
- #endif /* TARGET_HPPA */
++    cpu->env.flags = tb->flags;
  #ifdef TARGET_XTENSA
@@ -XXX,XX +XXX,XX @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
  }
- #if defined(HI_COMMPAGE)
+ #ifndef CONFIG_USER_ONLY
--#define LO_COMMPAGE 0
+@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
-+#define LO_COMMPAGE -1
+     SuperHCPU *cpu = SUPERH_CPU(cs);
- #elif defined(LO_COMMPAGE)
+     CPUSH4State *env = &cpu->env;
- #define HI_COMMPAGE 0
- #else
+-    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
- #define HI_COMMPAGE 0
++    if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
--#define LO_COMMPAGE 0
+         && env->pc != tb_pc(tb)) {
-+#define LO_COMMPAGE -1
+         env->pc -= 2;
- #define init_guest_commpage() true
+-        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
 +        env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
          return true;
      }
      return false;
 diff --git a/target/sh4/helper.c b/target/sh4/helper.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/sh4/helper.c
 +++ b/target/sh4/helper.c
@@ -XXX,XX +XXX,XX @@ void superh_cpu_do_interrupt(CPUState *cs)
      env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
      env->lock_addr = -1;
 -    if (env->flags & DELAY_SLOT_MASK) {
 +    if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
          /* Branch instruction should be executed again before delay slot. */
      env->spc -= 2;
      /* Clear flags for exception/interrupt routine. */
 -        env->flags &= ~DELAY_SLOT_MASK;
 +        env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
      }
      if (do_exp) {
@@ -XXX,XX +XXX,XX @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
          CPUSH4State *env = &cpu->env;
          /* Delay slots are indivisible, ignore interrupts */
 -        if (env->flags & DELAY_SLOT_MASK) {
 +        if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
              return false;
          } else {
              superh_cpu_do_interrupt(cs);
 diff --git a/target/sh4/translate.c b/target/sh4/translate.c
 index XXXXXXX..XXXXXXX 100644
 --- a/target/sh4/translate.c
 +++ b/target/sh4/translate.c
@@ -XXX,XX +XXX,XX @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
              i, env->gregs[i], i + 1, env->gregs[i + 1],
              i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
      }
 -    if (env->flags & DELAY_SLOT) {
 +    if (env->flags & TB_FLAG_DELAY_SLOT) {
          qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
              env->delayed_pc);
 -    } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
 +    } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
          qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
              env->delayed_pc);
 -    } else if (env->flags & DELAY_SLOT_RTE) {
 +    } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
          qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
                       env->delayed_pc);
      }
@@ -XXX,XX +XXX,XX @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
  static inline bool use_exit_tb(DisasContext *ctx)
  {
 -    return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
 +    return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
  }
  static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
@@ -XXX,XX +XXX,XX @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
      TCGLabel *l1 = gen_new_label();
      TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
 -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
 +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
          /* When in an exclusive region, we must continue to the end.
             Therefore, exit the region on a taken branch, but otherwise
             fall through to the next instruction.  */
          tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
 -        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
 +        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
          /* Note that this won't actually use a goto_tb opcode because we
             disallow it in use_goto_tb, but it handles exit + singlestep.  */
          gen_goto_tb(ctx, 0, dest);
@@ -XXX,XX +XXX,XX @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
      tcg_gen_mov_i32(ds, cpu_delayed_cond);
      tcg_gen_discard_i32(cpu_delayed_cond);
 -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
 +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
          /* When in an exclusive region, we must continue to the end.
             Therefore, exit the region on a taken branch, but otherwise
             fall through to the next instruction.  */
          tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
          /* Leave the gUSA region.  */
 -        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
 +        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
          gen_jump(ctx);
          gen_set_label(l1);
@@ -XXX,XX +XXX,XX @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
  #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
  #define CHECK_NOT_DELAY_SLOT \
 -    if (ctx->envflags & DELAY_SLOT_MASK) {  \
 -        goto do_illegal_slot;               \
 +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {  \
 +        goto do_illegal_slot;                       \
      }
  #define CHECK_PRIVILEGED \
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
      case 0x000b:        /* rts */
      CHECK_NOT_DELAY_SLOT
      tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
 -        ctx->envflags |= DELAY_SLOT;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
      ctx->delayed_pc = (uint32_t) - 1;
      return;
      case 0x0028:        /* clrmac */
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
      CHECK_NOT_DELAY_SLOT
          gen_write_sr(cpu_ssr);
      tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
 -        ctx->envflags |= DELAY_SLOT_RTE;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
      ctx->delayed_pc = (uint32_t) - 1;
          ctx->base.is_jmp = DISAS_STOP;
      return;
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
      return;
      case 0xe000:        /* mov #imm,Rn */
  #ifdef CONFIG_USER_ONLY
 -        /* Detect the start of a gUSA region.  If so, update envflags
 -           and end the TB.  This will allow us to see the end of the
 -           region (stored in R0) in the next TB.  */
 +        /*
 +         * Detect the start of a gUSA region (mov #-n, r15).
 +         * If so, update envflags and end the TB.  This will allow us
 +         * to see the end of the region (stored in R0) in the next TB.
 +         */
          if (B11_8 == 15 && B7_0s < 0 &&
              (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
 -            ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
 +            ctx->envflags =
 +                deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
              ctx->base.is_jmp = DISAS_STOP;
          }
  #endif
+@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
-@@ -XXX,XX +XXX,XX @@ static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
+     case 0xa000:        /* bra disp */
      CHECK_NOT_DELAY_SLOT
          ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
 -        ctx->envflags |= DELAY_SLOT;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
      return;
      case 0xb000:        /* bsr disp */
      CHECK_NOT_DELAY_SLOT
          tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
          ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
 -        ctx->envflags |= DELAY_SLOT;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
      return;
      }
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
      CHECK_NOT_DELAY_SLOT
          tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
          ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
 -        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
      return;
      case 0x8900:        /* bt label */
      CHECK_NOT_DELAY_SLOT
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
      CHECK_NOT_DELAY_SLOT
          tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
          ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
 -        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
      return;
      case 0x8800:        /* cmp/eq #imm,R0 */
          tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
      case 0x0023:        /* braf Rn */
      CHECK_NOT_DELAY_SLOT
          tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
 -        ctx->envflags |= DELAY_SLOT;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
      ctx->delayed_pc = (uint32_t) - 1;
      return;
      case 0x0003:        /* bsrf Rn */
      CHECK_NOT_DELAY_SLOT
          tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
      tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
 -        ctx->envflags |= DELAY_SLOT;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
      ctx->delayed_pc = (uint32_t) - 1;
      return;
      case 0x4015:        /* cmp/pl Rn */
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
      case 0x402b:        /* jmp @Rn */
      CHECK_NOT_DELAY_SLOT
      tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
 -        ctx->envflags |= DELAY_SLOT;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
      ctx->delayed_pc = (uint32_t) - 1;
      return;
      case 0x400b:        /* jsr @Rn */
      CHECK_NOT_DELAY_SLOT
          tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
      tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
 -        ctx->envflags |= DELAY_SLOT;
 +        ctx->envflags |= TB_FLAG_DELAY_SLOT;
      ctx->delayed_pc = (uint32_t) - 1;
      return;
      case 0x400e:        /* ldc Rm,SR */
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
      fflush(stderr);
  #endif
   do_illegal:
 -    if (ctx->envflags & DELAY_SLOT_MASK) {
 +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
   do_illegal_slot:
          gen_save_cpu_state(ctx, true);
          gen_helper_raise_slot_illegal_instruction(cpu_env);
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
   do_fpu_disabled:
      gen_save_cpu_state(ctx, true);
 -    if (ctx->envflags & DELAY_SLOT_MASK) {
 +    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
          gen_helper_raise_slot_fpu_disable(cpu_env);
      } else {
          gen_helper_raise_fpu_disable(cpu_env);
@@ -XXX,XX +XXX,XX @@ static void decode_opc(DisasContext * ctx)
      _decode_opc(ctx);
 -    if (old_flags & DELAY_SLOT_MASK) {
 +    if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
          /* go out of the delay slot */
 -        ctx->envflags &= ~DELAY_SLOT_MASK;
 +        ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
          /* When in an exclusive region, we must continue to the end
             for conditional branches.  */
 -        if (ctx->tbflags & GUSA_EXCLUSIVE
 -            && old_flags & DELAY_SLOT_CONDITIONAL) {
 +        if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
 +            && old_flags & TB_FLAG_DELAY_SLOT_COND) {
              gen_delayed_conditional_jump(ctx);
              return;
          }
          /* Otherwise this is probably an invalid gUSA region.
             Drop the GUSA bits so the next TB doesn't see them.  */
 -        ctx->envflags &= ~GUSA_MASK;
 +        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
          tcg_gen_movi_i32(cpu_flags, ctx->envflags);
 -        if (old_flags & DELAY_SLOT_CONDITIONAL) {
 +        if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
          gen_delayed_conditional_jump(ctx);
          } else {
-             offset = -(HI_COMMPAGE & -align);
+             gen_jump(ctx);
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
      }
      /* The entire region has been translated.  */
 -    ctx->envflags &= ~GUSA_MASK;
 +    ctx->envflags &= ~TB_FLAG_GUSA_MASK;
      ctx->base.pc_next = pc_end;
      ctx->base.num_insns += max_insns - 1;
      return;
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
      /* Restart with the EXCLUSIVE bit set, within a TB run via
         cpu_exec_step_atomic holding the exclusive lock.  */
 -    ctx->envflags |= GUSA_EXCLUSIVE;
 +    ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
      gen_save_cpu_state(ctx, false);
      gen_helper_exclusive(cpu_env);
      ctx->base.is_jmp = DISAS_NORETURN;
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
                    (tbflags & (1 << SR_RB))) * 0x10;
      ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
 -    if (tbflags & GUSA_MASK) {
 +#ifdef CONFIG_USER_ONLY
 +    if (tbflags & TB_FLAG_GUSA_MASK) {
 +        /* In gUSA exclusive region. */
          uint32_t pc = ctx->base.pc_next;
          uint32_t pc_end = ctx->base.tb->cs_base;
 -        int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
 +        int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
          int max_insns = (pc_end - pc) / 2;
          if (pc != pc_end + backup || max_insns < 2) {
              /* This is a malformed gUSA region.  Don't do anything special,
                 since the interpreter is likely to get confused.  */
 -            ctx->envflags &= ~GUSA_MASK;
 -        } else if (tbflags & GUSA_EXCLUSIVE) {
 +            ctx->envflags &= ~TB_FLAG_GUSA_MASK;
 +        } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
              /* Regardless of single-stepping or the end of the page,
                 we must complete execution of the gUSA region while
                 holding the exclusive lock.  */
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
              return;
          }
--    } else if (LO_COMMPAGE != 0) {
+     }
-+    } else if (LO_COMMPAGE != -1) {
++#endif
-         loaddr = MIN(loaddr, LO_COMMPAGE & -align);
-     }
+     /* Since the ISA is fixed-width, we can bound by the number
+        of instructions remaining on the page.  */
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
      DisasContext *ctx = container_of(dcbase, DisasContext, base);
  #ifdef CONFIG_USER_ONLY
 -    if (unlikely(ctx->envflags & GUSA_MASK)
 -        && !(ctx->envflags & GUSA_EXCLUSIVE)) {
 +    if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
 +        && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
          /* We're in an gUSA region, and we have not already fallen
             back on using an exclusive region.  Attempt to parse the
             region into a single supported atomic operation.  Failure
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
  {
      DisasContext *ctx = container_of(dcbase, DisasContext, base);
 -    if (ctx->tbflags & GUSA_EXCLUSIVE) {
 +    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
          /* Ending the region of exclusivity.  Clear the bits.  */
 -        ctx->envflags &= ~GUSA_MASK;
 +        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
      }
      switch (ctx->base.is_jmp) {
 --
 .34.1

-[PULL 03/20] linux-user/x86_64: Allocate vsyscall page as a commpage
+Deleted patch
-We're about to start validating PAGE_EXEC, which means that we've
-got to mark the vsyscall page executable.  We had been special
-casing this entirely within translate.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- linux-user/elfload.c | 23 +++++++++++++++++++++++
-file changed, 23 insertions(+)
-diff --git a/linux-user/elfload.c b/linux-user/elfload.c
-index XXXXXXX..XXXXXXX 100644
---- a/linux-user/elfload.c
-+++ b/linux-user/elfload.c
-@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
-     (*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff);
- }
-+#if ULONG_MAX >= TARGET_VSYSCALL_PAGE
-+#define INIT_GUEST_COMMPAGE
-+static bool init_guest_commpage(void)
-+{
-+    /*
-+     * The vsyscall page is at a high negative address aka kernel space,
-+     * which means that we cannot actually allocate it with target_mmap.
-+     * We still should be able to use page_set_flags, unless the user
-+     * has specified -R reserved_va, which would trigger an assert().
-+     */
-+    if (reserved_va != 0 &&
-+        TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE >= reserved_va) {
-+        error_report("Cannot allocate vsyscall page");
-+        exit(EXIT_FAILURE);
-+    }
-+    page_set_flags(TARGET_VSYSCALL_PAGE,
-+                   TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE,
-+                   PAGE_EXEC | PAGE_VALID);
-+    return true;
-+}
-+#endif
- #else
- #define ELF_START_MMAP 0x80000000
-@@ -XXX,XX +XXX,XX @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
- #else
- #define HI_COMMPAGE 0
- #define LO_COMMPAGE -1
-+#ifndef INIT_GUEST_COMMPAGE
- #define init_guest_commpage() true
- #endif
-+#endif
- static void pgb_fail_in_use(const char *image_name)
- {
---
-.34.1

-[PULL 06/20] tests/tcg/i386: Move smc_code2 to an executable section
+Deleted patch
-We're about to start validating PAGE_EXEC, which means
-that we've got to put this code into a section that is
-both writable and executable.
-Note that this test did not run on hardware beforehand either.
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- tests/tcg/i386/test-i386.c | 2 +-
-file changed, 1 insertion(+), 1 deletion(-)
-diff --git a/tests/tcg/i386/test-i386.c b/tests/tcg/i386/test-i386.c
-index XXXXXXX..XXXXXXX 100644
---- a/tests/tcg/i386/test-i386.c
-+++ b/tests/tcg/i386/test-i386.c
-@@ -XXX,XX +XXX,XX @@ uint8_t code[] = {
-xc3, /* ret */
- };
--asm(".section \".data\"\n"
-+asm(".section \".data_x\",\"awx\"\n"
-     "smc_code2:\n"
-     "movl 4(%esp), %eax\n"
-     "movl %eax, smc_patch_addr2 + 1\n"
---
-.34.1

-[PULL 14/20] accel/tcg: Remove translator_ldsw
+Deleted patch
-The only user can easily use translator_lduw and
-adjust the type to signed during the return.
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- include/exec/translator.h   | 1 -
- target/i386/tcg/translate.c | 2 +-
-files changed, 1 insertion(+), 2 deletions(-)
-diff --git a/include/exec/translator.h b/include/exec/translator.h
-index XXXXXXX..XXXXXXX 100644
---- a/include/exec/translator.h
-+++ b/include/exec/translator.h
-@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
- #define FOR_EACH_TRANSLATOR_LD(F)                                       \
-     F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)           \
--    F(translator_ldsw, int16_t, cpu_ldsw_code, bswap16)                 \
-     F(translator_lduw, uint16_t, cpu_lduw_code, bswap16)                \
-     F(translator_ldl, uint32_t, cpu_ldl_code, bswap32)                  \
-     F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
-diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/i386/tcg/translate.c
-+++ b/target/i386/tcg/translate.c
-@@ -XXX,XX +XXX,XX @@ static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
- static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
- {
--    return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
-+    return translator_lduw(env, &s->base, advance_pc(env, s, 2));
- }
- static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
---
-.34.1

-[PULL 19/20] target/riscv: Add MAX_INSN_LEN and insn_len
+Deleted patch
-These will be useful in properly ending the TB.
-Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
-Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
-Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
----
- target/riscv/translate.c | 10 +++++++++-
-file changed, 9 insertions(+), 1 deletion(-)
-diff --git a/target/riscv/translate.c b/target/riscv/translate.c
-index XXXXXXX..XXXXXXX 100644
---- a/target/riscv/translate.c
-+++ b/target/riscv/translate.c
-@@ -XXX,XX +XXX,XX @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
- /* Include decoders for factored-out extensions */
- #include "decode-XVentanaCondOps.c.inc"
-+/* The specification allows for longer insns, but not supported by qemu. */
-+#define MAX_INSN_LEN  4
-+
-+static inline int insn_len(uint16_t first_word)
-+{
-+    return (first_word & 3) == 3 ? 4 : 2;
-+}
-+
- static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
- {
-     /*
-@@ -XXX,XX +XXX,XX @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
-     };
-     /* Check for compressed insn */
--    if (extract16(opcode, 0, 2) != 3) {
-+    if (insn_len(opcode) == 2) {
-         if (!has_ext(ctx, RVC)) {
-             gen_exception_illegal(ctx);
-         } else {
---
-.34.1

The following changes since commit e93ded1bf6c94ab95015b33e188bc8b0b0c32670:

Merge tag 'testing-pull-request-2022-08-30' of https://gitlab.com/thuth/qemu into staging (2022-08-31 18:19:03 -0400)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220901

for you to fetch changes up to 20011be2e30b8aa8ef1fc258485f00c688703deb:

target/riscv: Make translator stop before the end of a page (2022-09-01 07:43:08 +0100)

----------------------------------------------------------------
Respect PROT_EXEC in user-only mode.
Fix s390x, i386 and riscv for translations crossing a page.

----------------------------------------------------------------
Ilya Leoshkevich (4):
      linux-user: Clear translations on mprotect()
      accel/tcg: Introduce is_same_page()
      target/s390x: Make translator stop before the end of a page
      target/i386: Make translator stop before the end of a page

Richard Henderson (16):
      linux-user/arm: Mark the commpage executable
      linux-user/hppa: Allocate page zero as a commpage
      linux-user/x86_64: Allocate vsyscall page as a commpage
      linux-user: Honor PT_GNU_STACK
      tests/tcg/i386: Move smc_code2 to an executable section
      accel/tcg: Properly implement get_page_addr_code for user-only
      accel/tcg: Unlock mmap_lock after longjmp
      accel/tcg: Make tb_htable_lookup static
      accel/tcg: Move qemu_ram_addr_from_host_nofail to physmem.c
      accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp
      accel/tcg: Document the faulting lookup in tb_lookup_cmp
      accel/tcg: Remove translator_ldsw
      accel/tcg: Add pc and host_pc params to gen_intermediate_code
      accel/tcg: Add fast path for translator_ld*
      target/riscv: Add MAX_INSN_LEN and insn_len
      target/riscv: Make translator stop before the end of a page

include/elf.h                     |   1 +
 include/exec/cpu-common.h         |   1 +
 include/exec/exec-all.h           |  89 ++++++++----------------
 include/exec/translator.h         |  96 ++++++++++++++++---------
 linux-user/arm/target_cpu.h       |   4 +-
 linux-user/qemu.h                 |   1 +
 accel/tcg/cpu-exec.c              | 143 ++++++++++++++++++++------------------
 accel/tcg/cputlb.c                |  93 +++++++------------------
 accel/tcg/translate-all.c         |  29 ++++----
 accel/tcg/translator.c            | 135 ++++++++++++++++++++++++++---------
 accel/tcg/user-exec.c             |  17 ++++-
 linux-user/elfload.c              |  82 ++++++++++++++++++++--
 linux-user/mmap.c                 |   6 +-
 softmmu/physmem.c                 |  12 ++++
 target/alpha/translate.c          |   5 +-
 target/arm/translate.c            |   5 +-
 target/avr/translate.c            |   5 +-
 target/cris/translate.c           |   5 +-
 target/hexagon/translate.c        |   6 +-
 target/hppa/translate.c           |   5 +-
 target/i386/tcg/translate.c       |  71 +++++++++++--------
 target/loongarch/translate.c      |   6 +-
 target/m68k/translate.c           |   5 +-
 target/microblaze/translate.c     |   5 +-
 target/mips/tcg/translate.c       |   5 +-
 target/nios2/translate.c          |   5 +-
 target/openrisc/translate.c       |   6 +-
 target/ppc/translate.c            |   5 +-
 target/riscv/translate.c          |  32 +++++++--
 target/rx/translate.c             |   5 +-
 target/s390x/tcg/translate.c      |  20 ++++--
 target/sh4/translate.c            |   5 +-
 target/sparc/translate.c          |   5 +-
 target/tricore/translate.c        |   6 +-
 target/xtensa/translate.c         |   6 +-
 tests/tcg/i386/test-i386.c        |   2 +-
 tests/tcg/riscv64/noexec.c        |  79 +++++++++++++++++++++
 tests/tcg/s390x/noexec.c          | 106 ++++++++++++++++++++++++++++
 tests/tcg/x86_64/noexec.c         |  75 ++++++++++++++++++++
 tests/tcg/multiarch/noexec.c.inc  | 139 ++++++++++++++++++++++++++++++++++++
 tests/tcg/riscv64/Makefile.target |   1 +
 tests/tcg/s390x/Makefile.target   |   1 +
 tests/tcg/x86_64/Makefile.target  |   3 +-
 43 files changed, 966 insertions(+), 367 deletions(-)
 create mode 100644 tests/tcg/riscv64/noexec.c
 create mode 100644 tests/tcg/s390x/noexec.c
 create mode 100644 tests/tcg/x86_64/noexec.c
 create mode 100644 tests/tcg/multiarch/noexec.c.inc

We're about to start validating PAGE_EXEC, which means
that we've got to mark the commpage executable.  We had
been placing the commpage outside of reserved_va, which
was incorrect and lead to an abort.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/arm/target_cpu.h | 4 ++--
 linux-user/elfload.c        | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/arm/target_cpu.h
+++ b/linux-user/arm/target_cpu.h
@@ -XXX,XX +XXX,XX @@ static inline unsigned long arm_max_reserved_va(CPUState *cs)
     } else {
         /*
          * We need to be able to map the commpage.
-         * See validate_guest_space in linux-user/elfload.c.
+         * See init_guest_commpage in linux-user/elfload.c.
          */
-        return 0xffff0000ul;
+        return 0xfffffffful;
     }
 }
 #define MAX_RESERVED_VA  arm_max_reserved_va
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -XXX,XX +XXX,XX @@ enum {
 
 static bool init_guest_commpage(void)
 {
-    void *want = g2h_untagged(HI_COMMPAGE & -qemu_host_page_size);
+    abi_ptr commpage = HI_COMMPAGE & -qemu_host_page_size;
+    void *want = g2h_untagged(commpage);
     void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE,
                       MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
 
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
         perror("Protecting guest commpage");
         exit(EXIT_FAILURE);
     }
+
+    page_set_flags(commpage, commpage + qemu_host_page_size,
+                   PAGE_READ | PAGE_EXEC | PAGE_VALID);
     return true;
 }
 
-- 
2.34.1

While there are no target-specific nonfaulting probes,
generic code may grow some uses at some point.

Note that the attrs argument was incorrect -- it should have
been MEMTXATTRS_UNSPECIFIED. Just use the simpler interface.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/avr/helper.c | 46 ++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/target/avr/helper.c b/target/avr/helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool probe, uintptr_t retaddr)
 {
-    int prot = 0;
-    MemTxAttrs attrs = {};
+    int prot, page_size = TARGET_PAGE_SIZE;
     uint32_t paddr;
 
     address &= TARGET_PAGE_MASK;
 
     if (mmu_idx == MMU_CODE_IDX) {
-        /* access to code in flash */
+        /* Access to code in flash. */
         paddr = OFFSET_CODE + address;
         prot = PAGE_READ | PAGE_EXEC;
-        if (paddr + TARGET_PAGE_SIZE > OFFSET_DATA) {
+        if (paddr >= OFFSET_DATA) {
+            /*
+             * This should not be possible via any architectural operations.
+             * There is certainly not an exception that we can deliver.
+             * Accept probing that might come from generic code.
+             */
+            if (probe) {
+                return false;
+            }
             error_report("execution left flash memory");
             abort();
         }
-    } else if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
-        /*
-         * access to CPU registers, exit and rebuilt this TB to use full access
-         * incase it touches specially handled registers like SREG or SP
-         */
-        AVRCPU *cpu = AVR_CPU(cs);
-        CPUAVRState *env = &cpu->env;
-        env->fullacc = 1;
-        cpu_loop_exit_restore(cs, retaddr);
     } else {
-        /* access to memory. nothing special */
+        /* Access to memory. */
         paddr = OFFSET_DATA + address;
         prot = PAGE_READ | PAGE_WRITE;
+        if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
+            /*
+             * Access to CPU registers, exit and rebuilt this TB to use
+             * full access in case it touches specially handled registers
+             * like SREG or SP.  For probing, set page_size = 1, in order
+             * to force tlb_fill to be called for the next access.
+             */
+            if (probe) {
+                page_size = 1;
+            } else {
+                AVRCPU *cpu = AVR_CPU(cs);
+                CPUAVRState *env = &cpu->env;
+                env->fullacc = 1;
+                cpu_loop_exit_restore(cs, retaddr);
+            }
+        }
     }
 
-    tlb_set_page_with_attrs(cs, address, paddr, attrs, prot,
-                            mmu_idx, TARGET_PAGE_SIZE);
-
+    tlb_set_page(cs, address, paddr, prot, mmu_idx, page_size);
     return true;
 }
 
-- 
2.34.1

There is no need to go through cc->tcg_ops when
we know what value that must have.

Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/avr/helper.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/target/avr/helper.c b/target/avr/helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -XXX,XX +XXX,XX @@
 bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
 {
     bool ret = false;
-    CPUClass *cc = CPU_GET_CLASS(cs);
     AVRCPU *cpu = AVR_CPU(cs);
     CPUAVRState *env = &cpu->env;
 
     if (interrupt_request & CPU_INTERRUPT_RESET) {
         if (cpu_interrupts_enabled(env)) {
             cs->exception_index = EXCP_RESET;
-            cc->tcg_ops->do_interrupt(cs);
+            avr_cpu_do_interrupt(cs);
 
             cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
 
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
         if (cpu_interrupts_enabled(env) && env->intsrc != 0) {
             int index = ctz32(env->intsrc);
             cs->exception_index = EXCP_INT(index);
-            cc->tcg_ops->do_interrupt(cs);
+            avr_cpu_do_interrupt(cs);
 
             env->intsrc &= env->intsrc - 1; /* clear the interrupt */
             if (!env->intsrc) {
-- 
2.34.1

We're about to start validating PAGE_EXEC, which means that we've
got to mark page zero executable.  We had been special casing this
entirely within translate.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/elfload.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
     regs->gr[31] = infop->entry;
 }
 
+#define LO_COMMPAGE  0
+
+static bool init_guest_commpage(void)
+{
+    void *want = g2h_untagged(LO_COMMPAGE);
+    void *addr = mmap(want, qemu_host_page_size, PROT_NONE,
+                      MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+
+    if (addr == MAP_FAILED) {
+        perror("Allocating guest commpage");
+        exit(EXIT_FAILURE);
+    }
+    if (addr != want) {
+        return false;
+    }
+
+    /*
+     * On Linux, page zero is normally marked execute only + gateway.
+     * Normal read or write is supposed to fail (thus PROT_NONE above),
+     * but specific offsets have kernel code mapped to raise permissions
+     * and implement syscalls.  Here, simply mark the page executable.
+     * Special case the entry points during translation (see do_page_zero).
+     */
+    page_set_flags(LO_COMMPAGE, LO_COMMPAGE + TARGET_PAGE_SIZE,
+                   PAGE_EXEC | PAGE_VALID);
+    return true;
+}
+
 #endif /* TARGET_HPPA */
 
 #ifdef TARGET_XTENSA
@@ -XXX,XX +XXX,XX @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
 }
 
 #if defined(HI_COMMPAGE)
-#define LO_COMMPAGE 0
+#define LO_COMMPAGE -1
 #elif defined(LO_COMMPAGE)
 #define HI_COMMPAGE 0
 #else
 #define HI_COMMPAGE 0
-#define LO_COMMPAGE 0
+#define LO_COMMPAGE -1
 #define init_guest_commpage() true
 #endif
 
@@ -XXX,XX +XXX,XX @@ static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
         } else {
             offset = -(HI_COMMPAGE & -align);
         }
-    } else if (LO_COMMPAGE != 0) {
+    } else if (LO_COMMPAGE != -1) {
         loaddr = MIN(loaddr, LO_COMMPAGE & -align);
     }
 
-- 
2.34.1

We're about to start validating PAGE_EXEC, which means that we've
got to mark the vsyscall page executable.  We had been special
casing this entirely within translate.

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
     (*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff);
 }
 
+#if ULONG_MAX >= TARGET_VSYSCALL_PAGE
+#define INIT_GUEST_COMMPAGE
+static bool init_guest_commpage(void)
+{
+    /*
+     * The vsyscall page is at a high negative address aka kernel space,
+     * which means that we cannot actually allocate it with target_mmap.
+     * We still should be able to use page_set_flags, unless the user
+     * has specified -R reserved_va, which would trigger an assert().
+     */
+    if (reserved_va != 0 &&
+        TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE >= reserved_va) {
+        error_report("Cannot allocate vsyscall page");
+        exit(EXIT_FAILURE);
+    }
+    page_set_flags(TARGET_VSYSCALL_PAGE,
+                   TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE,
+                   PAGE_EXEC | PAGE_VALID);
+    return true;
+}
+#endif
 #else
 
 #define ELF_START_MMAP 0x80000000
@@ -XXX,XX +XXX,XX @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
 #else
 #define HI_COMMPAGE 0
 #define LO_COMMPAGE -1
+#ifndef INIT_GUEST_COMMPAGE
 #define init_guest_commpage() true
 #endif
+#endif
 
 static void pgb_fail_in_use(const char *image_name)
 {
-- 
2.34.1

We cannot deliver two interrupts simultaneously;
the first interrupt handler must execute first.

Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/avr/helper.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

This bit is not saved across interrupts, so we must
delay delivering the interrupt until the skip has
been processed.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1118
Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/avr/helper.c    |  9 +++++++++
 target/avr/translate.c | 26 ++++++++++++++++++++++----
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/target/avr/helper.c b/target/avr/helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/avr/helper.c
+++ b/target/avr/helper.c
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
     AVRCPU *cpu = AVR_CPU(cs);
     CPUAVRState *env = &cpu->env;
 
+    /*
+     * We cannot separate a skip from the next instruction,
+     * as the skip would not be preserved across the interrupt.
+     * Separating the two insn normally only happens at page boundaries.
+     */
+    if (env->skip) {
+        return false;
+    }
+
     if (interrupt_request & CPU_INTERRUPT_RESET) {
         if (cpu_interrupts_enabled(env)) {
             cs->exception_index = EXCP_RESET;
diff --git a/target/avr/translate.c b/target/avr/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/avr/translate.c
+++ b/target/avr/translate.c
@@ -XXX,XX +XXX,XX @@ static void avr_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     if (skip_label) {
         canonicalize_skip(ctx);
         gen_set_label(skip_label);
-        if (ctx->base.is_jmp == DISAS_NORETURN) {
+
+        switch (ctx->base.is_jmp) {
+        case DISAS_NORETURN:
             ctx->base.is_jmp = DISAS_CHAIN;
+            break;
+        case DISAS_NEXT:
+            if (ctx->base.tb->flags & TB_FLAGS_SKIP) {
+                ctx->base.is_jmp = DISAS_TOO_MANY;
+            }
+            break;
+        default:
+            break;
         }
     }
 
@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
     bool nonconst_skip = canonicalize_skip(ctx);
+    /*
+     * Because we disable interrupts while env->skip is set,
+     * we must return to the main loop to re-evaluate afterward.
+     */
+    bool force_exit = ctx->base.tb->flags & TB_FLAGS_SKIP;
 
     switch (ctx->base.is_jmp) {
     case DISAS_NORETURN:
@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
     case DISAS_NEXT:
     case DISAS_TOO_MANY:
     case DISAS_CHAIN:
-        if (!nonconst_skip) {
+        if (!nonconst_skip && !force_exit) {
             /* Note gen_goto_tb checks singlestep.  */
             gen_goto_tb(ctx, 1, ctx->npc);
             break;
@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
         tcg_gen_movi_tl(cpu_pc, ctx->npc);
         /* fall through */
     case DISAS_LOOKUP:
-        tcg_gen_lookup_and_goto_ptr();
-        break;
+        if (!force_exit) {
+            tcg_gen_lookup_and_goto_ptr();
+            break;
+        }
+        /* fall through */
     case DISAS_EXIT:
         tcg_gen_exit_tb(NULL, 0);
         break;
-- 
2.34.1

Map the stack executable if required by default or on demand.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/elf.h        |  1 +
 linux-user/qemu.h    |  1 +
 linux-user/elfload.c | 19 ++++++++++++++++++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/elf.h b/include/elf.h
index XXXXXXX..XXXXXXX 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -XXX,XX +XXX,XX @@ typedef int64_t  Elf64_Sxword;
 #define PT_LOPROC  0x70000000
 #define PT_HIPROC  0x7fffffff
 
+#define PT_GNU_STACK      (PT_LOOS + 0x474e551)
 #define PT_GNU_PROPERTY   (PT_LOOS + 0x474e553)
 
 #define PT_MIPS_REGINFO   0x70000000
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -XXX,XX +XXX,XX @@ struct image_info {
         uint32_t        elf_flags;
         int             personality;
         abi_ulong       alignment;
+        bool            exec_stack;
 
         /* Generic semihosting knows about these pointers. */
         abi_ulong       arg_strings;   /* strings for argv */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
 #define ELF_ARCH        EM_386
 
 #define ELF_PLATFORM get_elf_platform()
+#define EXSTACK_DEFAULT true
 
 static const char *get_elf_platform(void)
 {
@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
 
 #define ELF_ARCH        EM_ARM
 #define ELF_CLASS       ELFCLASS32
+#define EXSTACK_DEFAULT true
 
 static inline void init_thread(struct target_pt_regs *regs,
                                struct image_info *infop)
@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
 #else
 
 #define ELF_CLASS       ELFCLASS32
+#define EXSTACK_DEFAULT true
 
 #endif
 
@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en
 
 #define ELF_CLASS   ELFCLASS64
 #define ELF_ARCH    EM_LOONGARCH
+#define EXSTACK_DEFAULT true
 
 #define elf_check_arch(x) ((x) == EM_LOONGARCH)
 
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap(void)
 #define ELF_CLASS   ELFCLASS32
 #endif
 #define ELF_ARCH    EM_MIPS
+#define EXSTACK_DEFAULT true
 
 #ifdef TARGET_ABI_MIPSN32
 #define elf_check_abi(x) ((x) & EF_MIPS_ABI2)
@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
 #define bswaptls(ptr) bswap32s(ptr)
 #endif
 
+#ifndef EXSTACK_DEFAULT
+#define EXSTACK_DEFAULT false
+#endif
+
 #include "elf.h"
 
 /* We must delay the following stanzas until after "elf.h". */
@@ -XXX,XX +XXX,XX @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
                                  struct image_info *info)
 {
     abi_ulong size, error, guard;
+    int prot;
 
     size = guest_stack_size;
     if (size < STACK_LOWER_LIMIT) {
@@ -XXX,XX +XXX,XX @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
         guard = qemu_real_host_page_size();
     }
 
-    error = target_mmap(0, size + guard, PROT_READ | PROT_WRITE,
+    prot = PROT_READ | PROT_WRITE;
+    if (info->exec_stack) {
+        prot |= PROT_EXEC;
+    }
+    error = target_mmap(0, size + guard, prot,
                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (error == -1) {
         perror("mmap stack");
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
      */
     loaddr = -1, hiaddr = 0;
     info->alignment = 0;
+    info->exec_stack = EXSTACK_DEFAULT;
     for (i = 0; i < ehdr->e_phnum; ++i) {
         struct elf_phdr *eppnt = phdr + i;
         if (eppnt->p_type == PT_LOAD) {
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
             if (!parse_elf_properties(image_fd, info, eppnt, bprm_buf, &err)) {
                 goto exit_errmsg;
             }
+        } else if (eppnt->p_type == PT_GNU_STACK) {
+            info->exec_stack = eppnt->p_flags & PF_X;
         }
     }
 
-- 
2.34.1

From: Ilya Leoshkevich <iii@linux.ibm.com>

Introduce a function that checks whether a given address is on the same
page as where disassembly started. Having it improves readability of
the following patches.

Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Message-Id: <20220811095534.241224-3-iii@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
[rth: Make the DisasContextBase parameter const.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/translator.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -XXX,XX +XXX,XX @@ FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
 
 #undef GEN_TRANSLATOR_LD
 
+/*
+ * Return whether addr is on the same page as where disassembly started.
+ * Translators can use this to enforce the rule that only single-insn
+ * translation blocks are allowed to cross page boundaries.
+ */
+static inline bool is_same_page(const DisasContextBase *db, target_ulong addr)
+{
+    return ((addr ^ db->pc_first) & TARGET_PAGE_MASK) == 0;
+}
+
 #endif /* EXEC__TRANSLATOR_H */
-- 
2.34.1

The current implementation is a no-op, simply returning addr.
This is incorrect, because we ought to be checking the page
permissions for execution.

Make get_page_addr_code inline for both implementations.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/exec-all.h | 85 ++++++++++++++---------------------------
 accel/tcg/cputlb.c      |  5 ---
 accel/tcg/user-exec.c   | 14 +++++++
 3 files changed, 42 insertions(+), 62 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -XXX,XX +XXX,XX @@ struct MemoryRegionSection *iotlb_to_section(CPUState *cpu,
                                              hwaddr index, MemTxAttrs attrs);
 #endif
 
-#if defined(CONFIG_USER_ONLY)
-void mmap_lock(void);
-void mmap_unlock(void);
-bool have_mmap_lock(void);
-
 /**
- * get_page_addr_code() - user-mode version
+ * get_page_addr_code_hostp()
  * @env: CPUArchState
  * @addr: guest virtual address of guest code
  *
- * Returns @addr.
+ * See get_page_addr_code() (full-system version) for documentation on the
+ * return value.
+ *
+ * Sets *@hostp (when @hostp is non-NULL) as follows.
+ * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
+ * to the host address where @addr's content is kept.
+ *
+ * Note: this function can trigger an exception.
+ */
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+                                        void **hostp);
+
+/**
+ * get_page_addr_code()
+ * @env: CPUArchState
+ * @addr: guest virtual address of guest code
+ *
+ * If we cannot translate and execute from the entire RAM page, or if
+ * the region is not backed by RAM, returns -1. Otherwise, returns the
+ * ram_addr_t corresponding to the guest code at @addr.
+ *
+ * Note: this function can trigger an exception.
  */
 static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
                                                 target_ulong addr)
 {
-    return addr;
+    return get_page_addr_code_hostp(env, addr, NULL);
 }
 
-/**
- * get_page_addr_code_hostp() - user-mode version
- * @env: CPUArchState
- * @addr: guest virtual address of guest code
- *
- * Returns @addr.
- *
- * If @hostp is non-NULL, sets *@hostp to the host address where @addr's content
- * is kept.
- */
-static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env,
-                                                      target_ulong addr,
-                                                      void **hostp)
-{
-    if (hostp) {
-        *hostp = g2h_untagged(addr);
-    }
-    return addr;
-}
+#if defined(CONFIG_USER_ONLY)
+void mmap_lock(void);
+void mmap_unlock(void);
+bool have_mmap_lock(void);
 
 /**
  * adjust_signal_pc:
@@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
 static inline void mmap_lock(void) {}
 static inline void mmap_unlock(void) {}
 
-/**
- * get_page_addr_code() - full-system version
- * @env: CPUArchState
- * @addr: guest virtual address of guest code
- *
- * If we cannot translate and execute from the entire RAM page, or if
- * the region is not backed by RAM, returns -1. Otherwise, returns the
- * ram_addr_t corresponding to the guest code at @addr.
- *
- * Note: this function can trigger an exception.
- */
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr);
-
-/**
- * get_page_addr_code_hostp() - full-system version
- * @env: CPUArchState
- * @addr: guest virtual address of guest code
- *
- * See get_page_addr_code() (full-system version) for documentation on the
- * return value.
- *
- * Sets *@hostp (when @hostp is non-NULL) as follows.
- * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
- * to the host address where @addr's content is kept.
- *
- * Note: this function can trigger an exception.
- */
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
-                                        void **hostp);
-
 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
 
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
     return qemu_ram_addr_from_host_nofail(p);
 }
 
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
-{
-    return get_page_addr_code_hostp(env, addr, NULL);
-}
-
 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
 {
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
     return size ? g2h(env_cpu(env), addr) : NULL;
 }
 
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+                                        void **hostp)
+{
+    int flags;
+
+    flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
+    g_assert(flags == 0);
+
+    if (hostp) {
+        *hostp = g2h_untagged(addr);
+    }
+    return addr;
+}
+
 /* The softmmu versions of these helpers are in cputlb.c.  */
 
 /*
-- 
2.34.1

The mmap_lock is held around tb_gen_code.  While the comment
is correct that the lock is dropped when tb_gen_code runs out
of memory, the lock is *not* dropped when an exception is
raised reading code for translation.

Acked-by: Alistair Francis <alistair.francis@wdc.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cpu-exec.c  | 12 ++++++------
 accel/tcg/user-exec.c |  3 ---
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
         cpu_tb_exec(cpu, tb, &tb_exit);
         cpu_exec_exit(cpu);
     } else {
-        /*
-         * The mmap_lock is dropped by tb_gen_code if it runs out of
-         * memory.
-         */
 #ifndef CONFIG_SOFTMMU
         clear_helper_retaddr();
-        tcg_debug_assert(!have_mmap_lock());
+        if (have_mmap_lock()) {
+            mmap_unlock();
+        }
 #endif
         if (qemu_mutex_iothread_locked()) {
             qemu_mutex_unlock_iothread();
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
 
 #ifndef CONFIG_SOFTMMU
         clear_helper_retaddr();
-        tcg_debug_assert(!have_mmap_lock());
+        if (have_mmap_lock()) {
+            mmap_unlock();
+        }
 #endif
         if (qemu_mutex_iothread_locked()) {
             qemu_mutex_unlock_iothread();
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -XXX,XX +XXX,XX @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
          * (and if the translator doesn't handle page boundaries correctly
          * there's little we can do about that here).  Therefore, do not
          * trigger the unwinder.
-         *
-         * Like tb_gen_code, release the memory lock before cpu_loop_exit.
          */
-        mmap_unlock();
         *pc = 0;
         return MMU_INST_FETCH;
     }
-- 
2.34.1

The function is not used outside of cpu-exec.c.  Move it and
its subroutines up in the file, before the first use.

Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/exec-all.h |   3 -
 accel/tcg/cpu-exec.c    | 122 ++++++++++++++++++++--------------------
 2 files changed, 61 insertions(+), 64 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
 #endif
 void tb_flush(CPUState *cpu);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
-TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-                                   target_ulong cs_base, uint32_t flags,
-                                   uint32_t cflags);
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
 
 /* GETPC is the true target of the return instruction that we'll execute.  */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@ uint32_t curr_cflags(CPUState *cpu)
     return cflags;
 }
 
+struct tb_desc {
+    target_ulong pc;
+    target_ulong cs_base;
+    CPUArchState *env;
+    tb_page_addr_t phys_page1;
+    uint32_t flags;
+    uint32_t cflags;
+    uint32_t trace_vcpu_dstate;
+};
+
+static bool tb_lookup_cmp(const void *p, const void *d)
+{
+    const TranslationBlock *tb = p;
+    const struct tb_desc *desc = d;
+
+    if (tb->pc == desc->pc &&
+        tb->page_addr[0] == desc->phys_page1 &&
+        tb->cs_base == desc->cs_base &&
+        tb->flags == desc->flags &&
+        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
+        tb_cflags(tb) == desc->cflags) {
+        /* check next page if needed */
+        if (tb->page_addr[1] == -1) {
+            return true;
+        } else {
+            tb_page_addr_t phys_page2;
+            target_ulong virt_page2;
+
+            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+            phys_page2 = get_page_addr_code(desc->env, virt_page2);
+            if (tb->page_addr[1] == phys_page2) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+                                          target_ulong cs_base, uint32_t flags,
+                                          uint32_t cflags)
+{
+    tb_page_addr_t phys_pc;
+    struct tb_desc desc;
+    uint32_t h;
+
+    desc.env = cpu->env_ptr;
+    desc.cs_base = cs_base;
+    desc.flags = flags;
+    desc.cflags = cflags;
+    desc.trace_vcpu_dstate = *cpu->trace_dstate;
+    desc.pc = pc;
+    phys_pc = get_page_addr_code(desc.env, pc);
+    if (phys_pc == -1) {
+        return NULL;
+    }
+    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
+    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
+    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
+}
+
 /* Might cause an exception, so have a longjmp destination ready */
 static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
                                           target_ulong cs_base,
@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
     end_exclusive();
 }
 
-struct tb_desc {
-    target_ulong pc;
-    target_ulong cs_base;
-    CPUArchState *env;
-    tb_page_addr_t phys_page1;
-    uint32_t flags;
-    uint32_t cflags;
-    uint32_t trace_vcpu_dstate;
-};
-
-static bool tb_lookup_cmp(const void *p, const void *d)
-{
-    const TranslationBlock *tb = p;
-    const struct tb_desc *desc = d;
-
-    if (tb->pc == desc->pc &&
-        tb->page_addr[0] == desc->phys_page1 &&
-        tb->cs_base == desc->cs_base &&
-        tb->flags == desc->flags &&
-        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
-        tb_cflags(tb) == desc->cflags) {
-        /* check next page if needed */
-        if (tb->page_addr[1] == -1) {
-            return true;
-        } else {
-            tb_page_addr_t phys_page2;
-            target_ulong virt_page2;
-
-            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
-            phys_page2 = get_page_addr_code(desc->env, virt_page2);
-            if (tb->page_addr[1] == phys_page2) {
-                return true;
-            }
-        }
-    }
-    return false;
-}
-
-TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-                                   target_ulong cs_base, uint32_t flags,
-                                   uint32_t cflags)
-{
-    tb_page_addr_t phys_pc;
-    struct tb_desc desc;
-    uint32_t h;
-
-    desc.env = cpu->env_ptr;
-    desc.cs_base = cs_base;
-    desc.flags = flags;
-    desc.cflags = cflags;
-    desc.trace_vcpu_dstate = *cpu->trace_dstate;
-    desc.pc = pc;
-    phys_pc = get_page_addr_code(desc.env, pc);
-    if (phys_pc == -1) {
-        return NULL;
-    }
-    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
-    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
-    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
-}
-
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
 {
     if (TCG_TARGET_HAS_direct_jump) {
-- 
2.34.1

The base qemu_ram_addr_from_host function is already in
softmmu/physmem.c; move the nofail version to be adjacent.

Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu-common.h |  1 +
 accel/tcg/cputlb.c        | 12 ------------
 softmmu/physmem.c         | 12 ++++++++++++
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -XXX,XX +XXX,XX @@ typedef uintptr_t ram_addr_t;
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 /* This should not be used by devices.  */
 ram_addr_t qemu_ram_addr_from_host(void *ptr);
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 RAMBlock *qemu_ram_block_by_name(const char *name);
 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
                                    ram_addr_t *offset);
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                             prot, mmu_idx, size);
 }
 
-static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
-{
-    ram_addr_t ram_addr;
-
-    ram_addr = qemu_ram_addr_from_host(ptr);
-    if (ram_addr == RAM_ADDR_INVALID) {
-        error_report("Bad ram pointer %p", ptr);
-        abort();
-    }
-    return ram_addr;
-}
-
 /*
  * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
  * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index XXXXXXX..XXXXXXX 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -XXX,XX +XXX,XX @@ ram_addr_t qemu_ram_addr_from_host(void *ptr)
     return block->offset + offset;
 }
 
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
+{
+    ram_addr_t ram_addr;
+
+    ram_addr = qemu_ram_addr_from_host(ptr);
+    if (ram_addr == RAM_ADDR_INVALID) {
+        error_report("Bad ram pointer %p", ptr);
+        abort();
+    }
+    return ram_addr;
+}
+
 static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
                                  MemTxAttrs attrs, void *buf, hwaddr len);
 static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
-- 
2.34.1

Simplify the implementation of get_page_addr_code_hostp
by reusing the existing probe_access infrastructure.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 76 ++++++++++++++++------------------------------
 1 file changed, 26 insertions(+), 50 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
   victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
                  (ADDR) & TARGET_PAGE_MASK)
 
-/*
- * Return a ram_addr_t for the virtual address for execution.
- *
- * Return -1 if we can't translate and execute from an entire page
- * of RAM.  This will force us to execute by loading and translating
- * one insn at a time, without caching.
- *
- * NOTE: This function will trigger an exception if the page is
- * not executable.
- */
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
-                                        void **hostp)
-{
-    uintptr_t mmu_idx = cpu_mmu_index(env, true);
-    uintptr_t index = tlb_index(env, mmu_idx, addr);
-    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
-    void *p;
-
-    if (unlikely(!tlb_hit(entry->addr_code, addr))) {
-        if (!VICTIM_TLB_HIT(addr_code, addr)) {
-            tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
-            index = tlb_index(env, mmu_idx, addr);
-            entry = tlb_entry(env, mmu_idx, addr);
-
-            if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
-                /*
-                 * The MMU protection covers a smaller range than a target
-                 * page, so we must redo the MMU check for every insn.
-                 */
-                return -1;
-            }
-        }
-        assert(tlb_hit(entry->addr_code, addr));
-    }
-
-    if (unlikely(entry->addr_code & TLB_MMIO)) {
-        /* The region is not backed by RAM.  */
-        if (hostp) {
-            *hostp = NULL;
-        }
-        return -1;
-    }
-
-    p = (void *)((uintptr_t)addr + entry->addend);
-    if (hostp) {
-        *hostp = p;
-    }
-    return qemu_ram_addr_from_host_nofail(p);
-}
-
 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
                            CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
 {
@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
     return flags ? NULL : host;
 }
 
+/*
+ * Return a ram_addr_t for the virtual address for execution.
+ *
+ * Return -1 if we can't translate and execute from an entire page
+ * of RAM.  This will force us to execute by loading and translating
+ * one insn at a time, without caching.
+ *
+ * NOTE: This function will trigger an exception if the page is
+ * not executable.
+ */
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+                                        void **hostp)
+{
+    void *p;
+
+    (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
+                                cpu_mmu_index(env, true), false, &p, 0);
+    if (p == NULL) {
+        return -1;
+    }
+    if (hostp) {
+        *hostp = p;
+    }
+    return qemu_ram_addr_from_host_nofail(p);
+}
+
 #ifdef CONFIG_PLUGIN
 /*
  * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
-- 
2.34.1

It was non-obvious to me why we can raise an exception in
the middle of a comparison function, but it works.
While nearby, use TARGET_PAGE_ALIGN instead of open-coding.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cpu-exec.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
             tb_page_addr_t phys_page2;
             target_ulong virt_page2;
 
-            virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+            /*
+             * We know that the first page matched, and an otherwise valid TB
+             * encountered an incomplete instruction at the end of that page,
+             * therefore we know that generating a new TB from the current PC
+             * must also require reading from the next page -- even if the
+             * second pages do not match, and therefore the resulting insn
+             * is different for the new TB.  Therefore any exception raised
+             * here by the faulting lookup is not premature.
+             */
+            virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
             phys_page2 = get_page_addr_code(desc->env, virt_page2);
             if (tb->page_addr[1] == phys_page2) {
                 return true;
-- 
2.34.1

The only user can easily use translator_lduw and
adjust the type to signed during the return.

Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/translator.h   | 1 -
 target/i386/tcg/translate.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
 
 #define FOR_EACH_TRANSLATOR_LD(F)                                       \
     F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)           \
-    F(translator_ldsw, int16_t, cpu_ldsw_code, bswap16)                 \
     F(translator_lduw, uint16_t, cpu_lduw_code, bswap16)                \
     F(translator_ldl, uint32_t, cpu_ldl_code, bswap32)                  \
     F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
 
 static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
 {
-    return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
+    return translator_lduw(env, &s->base, advance_pc(env, s, 2));
 }
 
 static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
-- 
2.34.1

Pass these along to translator_loop -- pc may be used instead
of tb->pc, and host_pc is currently unused.  Adjust all targets
at one time.

Acked-by: Alistair Francis <alistair.francis@wdc.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/exec-all.h       |  1 -
 include/exec/translator.h     | 24 ++++++++++++++++++++----
 accel/tcg/translate-all.c     |  6 ++++--
 accel/tcg/translator.c        |  9 +++++----
 target/alpha/translate.c      |  5 +++--
 target/arm/translate.c        |  5 +++--
 target/avr/translate.c        |  5 +++--
 target/cris/translate.c       |  5 +++--
 target/hexagon/translate.c    |  6 ++++--
 target/hppa/translate.c       |  5 +++--
 target/i386/tcg/translate.c   |  5 +++--
 target/loongarch/translate.c  |  6 ++++--
 target/m68k/translate.c       |  5 +++--
 target/microblaze/translate.c |  5 +++--
 target/mips/tcg/translate.c   |  5 +++--
 target/nios2/translate.c      |  5 +++--
 target/openrisc/translate.c   |  6 ++++--
 target/ppc/translate.c        |  5 +++--
 target/riscv/translate.c      |  5 +++--
 target/rx/translate.c         |  5 +++--
 target/s390x/tcg/translate.c  |  5 +++--
 target/sh4/translate.c        |  5 +++--
 target/sparc/translate.c      |  5 +++--
 target/tricore/translate.c    |  6 ++++--
 target/xtensa/translate.c     |  6 ++++--
 25 files changed, 97 insertions(+), 53 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -XXX,XX +XXX,XX @@ typedef ram_addr_t tb_page_addr_t;
 #define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
 #endif
 
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
 void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb,
                           target_ulong *data);
 
diff --git a/include/exec/translator.h b/include/exec/translator.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -XXX,XX +XXX,XX @@
 #include "exec/translate-all.h"
 #include "tcg/tcg.h"
 
+/**
+ * gen_intermediate_code
+ * @cpu: cpu context
+ * @tb: translation block
+ * @max_insns: max number of instructions to translate
+ * @pc: guest virtual program counter address
+ * @host_pc: host physical program counter address
+ *
+ * This function must be provided by the target, which should create
+ * the target-specific DisasContext, and then invoke translator_loop.
+ */
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc);
 
 /**
  * DisasJumpType:
@@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps {
 
 /**
  * translator_loop:
- * @ops: Target-specific operations.
- * @db: Disassembly context.
  * @cpu: Target vCPU.
  * @tb: Translation block.
  * @max_insns: Maximum number of insns to translate.
+ * @pc: guest virtual program counter address
+ * @host_pc: host physical program counter address
+ * @ops: Target-specific operations.
+ * @db: Disassembly context.
  *
  * Generic translator loop.
  *
@@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps {
  * - When single-stepping is enabled (system-wide or on the current vCPU).
  * - When too many instructions have been translated.
  */
-void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
-                     CPUState *cpu, TranslationBlock *tb, int max_insns);
+void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                     target_ulong pc, void *host_pc,
+                     const TranslatorOps *ops, DisasContextBase *db);
 
 void translator_loop_temp_check(DisasContextBase *db);
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@
 
 #include "exec/cputlb.h"
 #include "exec/translate-all.h"
+#include "exec/translator.h"
 #include "qemu/bitmap.h"
 #include "qemu/qemu-print.h"
 #include "qemu/timer.h"
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     TCGProfile *prof = &tcg_ctx->prof;
     int64_t ti;
 #endif
+    void *host_pc;
 
     assert_memory_lock();
     qemu_thread_jit_write();
 
-    phys_pc = get_page_addr_code(env, pc);
+    phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
 
     if (phys_pc == -1) {
         /* Generate a one-shot TB with 1 insn in it */
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tcg_func_start(tcg_ctx);
 
     tcg_ctx->cpu = env_cpu(env);
-    gen_intermediate_code(cpu, tb, max_insns);
+    gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
     assert(tb->size != 0);
     tcg_ctx->cpu = NULL;
     max_insns = tb->icount;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -XXX,XX +XXX,XX @@ static inline void translator_page_protect(DisasContextBase *dcbase,
 #endif
 }
 
-void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
-                     CPUState *cpu, TranslationBlock *tb, int max_insns)
+void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                     target_ulong pc, void *host_pc,
+                     const TranslatorOps *ops, DisasContextBase *db)
 {
     uint32_t cflags = tb_cflags(tb);
     bool plugin_enabled;
 
     /* Initialize DisasContext */
     db->tb = tb;
-    db->pc_first = tb->pc;
-    db->pc_next = db->pc_first;
+    db->pc_first = pc;
+    db->pc_next = pc;
     db->is_jmp = DISAS_NEXT;
     db->num_insns = 0;
     db->max_insns = max_insns;
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/alpha/translate.c
+++ b/target/alpha/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
     .disas_log          = alpha_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc;
-    translator_loop(&alpha_tr_ops, &dc.base, cpu, tb, max_insns);
+    translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
 }
 
 void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb,
diff --git a/target/arm/translate.c b/target/arm/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
 };
 
 /* generate intermediate code for basic block 'tb'.  */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc = { };
     const TranslatorOps *ops = &arm_translator_ops;
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
     }
 #endif
 
-    translator_loop(ops, &dc.base, cpu, tb, max_insns);
+    translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
 }
 
 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
diff --git a/target/avr/translate.c b/target/avr/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/avr/translate.c
+++ b/target/avr/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
     .disas_log          = avr_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc = { };
-    translator_loop(&avr_tr_ops, &dc.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
 }
 
 void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb,
diff --git a/target/cris/translate.c b/target/cris/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/cris/translate.c
+++ b/target/cris/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps cris_tr_ops = {
     .disas_log          = cris_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc;
-    translator_loop(&cris_tr_ops, &dc.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &cris_tr_ops, &dc.base);
 }
 
 void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
     .disas_log          = hexagon_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
 
-    translator_loop(&hexagon_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc,
+                    &hexagon_tr_ops, &ctx.base);
 }
 
 #define NAME_LEN               64
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
     .disas_log          = hppa_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
-    translator_loop(&hppa_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
 }
 
 void restore_state_to_opc(CPUHPPAState *env, TranslationBlock *tb,
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
 };
 
 /* generate intermediate code for basic block 'tb'.  */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc;
 
-    translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
+    translator_loop(cpu, tb, max_insns, pc, host_pc, &i386_tr_ops, &dc.base);
 }
 
 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/loongarch/translate.c
+++ b/target/loongarch/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
     .disas_log          = loongarch_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
 
-    translator_loop(&loongarch_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc,
+                    &loongarch_tr_ops, &ctx.base);
 }
 
 void loongarch_translate_init(void)
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
     .disas_log          = m68k_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc;
-    translator_loop(&m68k_tr_ops, &dc.base, cpu, tb, max_insns);
+    translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
 }
 
 static double floatx80_to_double(CPUM68KState *env, uint16_t high, uint64_t low)
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/microblaze/translate.c
+++ b/target/microblaze/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
     .disas_log          = mb_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc;
-    translator_loop(&mb_tr_ops, &dc.base, cpu, tb, max_insns);
+    translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
 }
 
 void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags)
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/mips/tcg/translate.c
+++ b/target/mips/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
     .disas_log          = mips_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
 
-    translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &mips_tr_ops, &ctx.base);
 }
 
 void mips_tcg_init(void)
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/nios2/translate.c
+++ b/target/nios2/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps nios2_tr_ops = {
     .disas_log          = nios2_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc;
-    translator_loop(&nios2_tr_ops, &dc.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &nios2_tr_ops, &dc.base);
 }
 
 void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags)
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/openrisc/translate.c
+++ b/target/openrisc/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
     .disas_log          = openrisc_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
 
-    translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc,
+                    &openrisc_tr_ops, &ctx.base);
 }
 
 void openrisc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
     .disas_log          = ppc_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
 
-    translator_loop(&ppc_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &ppc_tr_ops, &ctx.base);
 }
 
 void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb,
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
     .disas_log          = riscv_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
 
-    translator_loop(&riscv_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &riscv_tr_ops, &ctx.base);
 }
 
 void riscv_translate_init(void)
diff --git a/target/rx/translate.c b/target/rx/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/rx/translate.c
+++ b/target/rx/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
     .disas_log          = rx_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc;
 
-    translator_loop(&rx_tr_ops, &dc.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &rx_tr_ops, &dc.base);
 }
 
 void restore_state_to_opc(CPURXState *env, TranslationBlock *tb,
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
     .disas_log          = s390x_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc;
 
-    translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &s390x_tr_ops, &dc.base);
 }
 
 void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
     .disas_log          = sh4_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
 
-    translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &sh4_tr_ops, &ctx.base);
 }
 
 void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
     .disas_log          = sparc_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc = {};
 
-    translator_loop(&sparc_tr_ops, &dc.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc, &sparc_tr_ops, &dc.base);
 }
 
 void sparc_tcg_init(void)
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/tricore/translate.c
+++ b/target/tricore/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
 };
 
 
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext ctx;
-    translator_loop(&tricore_tr_ops, &ctx.base, cs, tb, max_insns);
+    translator_loop(cs, tb, max_insns, pc, host_pc,
+                    &tricore_tr_ops, &ctx.base);
 }
 
 void
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
     .disas_log          = xtensa_tr_disas_log,
 };
 
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
+                           target_ulong pc, void *host_pc)
 {
     DisasContext dc = {};
-    translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb, max_insns);
+    translator_loop(cpu, tb, max_insns, pc, host_pc,
+                    &xtensa_translator_ops, &dc.base);
 }
 
 void xtensa_cpu_dump_state(CPUState *cs, FILE *f, int flags)
-- 
2.34.1

Cache the translation from guest to host address, so we may
use direct loads when we hit on the primary translation page.

Look up the second translation page only once, during translation.
This obviates another lookup of the second page within tb_gen_code
after translation.

Fixes a bug in that plugin_insn_append should be passed the bytes
in the original memory order, not bswapped by pieces.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/translator.h |  63 +++++++++++--------
 accel/tcg/translate-all.c |  23 +++----
 accel/tcg/translator.c    | 126 +++++++++++++++++++++++++++++---------
 3 files changed, 141 insertions(+), 71 deletions(-)

diff --git a/include/exec/translator.h b/include/exec/translator.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -XXX,XX +XXX,XX @@ typedef enum DisasJumpType {
  * Architecture-agnostic disassembly context.
  */
 typedef struct DisasContextBase {
-    const TranslationBlock *tb;
+    TranslationBlock *tb;
     target_ulong pc_first;
     target_ulong pc_next;
     DisasJumpType is_jmp;
     int num_insns;
     int max_insns;
     bool singlestep_enabled;
-#ifdef CONFIG_USER_ONLY
-    /*
-     * Guest address of the last byte of the last protected page.
-     *
-     * Pages containing the translated instructions are made non-writable in
-     * order to achieve consistency in case another thread is modifying the
-     * code while translate_insn() fetches the instruction bytes piecemeal.
-     * Such writer threads are blocked on mmap_lock() in page_unprotect().
-     */
-    target_ulong page_protect_end;
-#endif
+    void *host_addr[2];
 } DisasContextBase;
 
 /**
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
  * the relevant information at translation time.
  */
 
-#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn)             \
-    type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
-                           abi_ptr pc, bool do_swap);                   \
-    static inline type fullname(CPUArchState *env,                      \
-                                DisasContextBase *dcbase, abi_ptr pc)   \
-    {                                                                   \
-        return fullname ## _swap(env, dcbase, pc, false);               \
+uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
+
+static inline uint16_t
+translator_lduw_swap(CPUArchState *env, DisasContextBase *db,
+                     abi_ptr pc, bool do_swap)
+{
+    uint16_t ret = translator_lduw(env, db, pc);
+    if (do_swap) {
+        ret = bswap16(ret);
     }
+    return ret;
+}
 
-#define FOR_EACH_TRANSLATOR_LD(F)                                       \
-    F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */)           \
-    F(translator_lduw, uint16_t, cpu_lduw_code, bswap16)                \
-    F(translator_ldl, uint32_t, cpu_ldl_code, bswap32)                  \
-    F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
+static inline uint32_t
+translator_ldl_swap(CPUArchState *env, DisasContextBase *db,
+                    abi_ptr pc, bool do_swap)
+{
+    uint32_t ret = translator_ldl(env, db, pc);
+    if (do_swap) {
+        ret = bswap32(ret);
+    }
+    return ret;
+}
 
-FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
-
-#undef GEN_TRANSLATOR_LD
+static inline uint64_t
+translator_ldq_swap(CPUArchState *env, DisasContextBase *db,
+                    abi_ptr pc, bool do_swap)
+{
+    uint64_t ret = translator_ldq_swap(env, db, pc, false);
+    if (do_swap) {
+        ret = bswap64(ret);
+    }
+    return ret;
+}
 
 /*
  * Return whether addr is on the same page as where disassembly started.
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 {
     CPUArchState *env = cpu->env_ptr;
     TranslationBlock *tb, *existing_tb;
-    tb_page_addr_t phys_pc, phys_page2;
-    target_ulong virt_page2;
+    tb_page_addr_t phys_pc;
     tcg_insn_unit *gen_code_buf;
     int gen_code_size, search_size, max_insns;
 #ifdef CONFIG_PROFILER
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tb->flags = flags;
     tb->cflags = cflags;
     tb->trace_vcpu_dstate = *cpu->trace_dstate;
+    tb->page_addr[0] = phys_pc;
+    tb->page_addr[1] = -1;
     tcg_ctx->tb_cflags = cflags;
  tb_overflow:
 
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     }
 
     /*
-     * If the TB is not associated with a physical RAM page then
-     * it must be a temporary one-insn TB, and we have nothing to do
-     * except fill in the page_addr[] fields. Return early before
-     * attempting to link to other TBs or add to the lookup table.
+     * If the TB is not associated with a physical RAM page then it must be
+     * a temporary one-insn TB, and we have nothing left to do. Return early
+     * before attempting to link to other TBs or add to the lookup table.
      */
-    if (phys_pc == -1) {
-        tb->page_addr[0] = tb->page_addr[1] = -1;
+    if (tb->page_addr[0] == -1) {
         return tb;
     }
 
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
      */
     tcg_tb_insert(tb);
 
-    /* check next page if needed */
-    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
-    phys_page2 = -1;
-    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
-        phys_page2 = get_page_addr_code(env, virt_page2);
-    }
     /*
      * No explicit memory barrier is required -- tb_link_page() makes the
      * TB visible in a consistent state.
      */
-    existing_tb = tb_link_page(tb, phys_pc, phys_page2);
+    existing_tb = tb_link_page(tb, tb->page_addr[0], tb->page_addr[1]);
     /* if the TB already exists, discard what we just translated */
     if (unlikely(existing_tb != tb)) {
         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
     return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
 }
 
-static inline void translator_page_protect(DisasContextBase *dcbase,
-                                           target_ulong pc)
-{
-#ifdef CONFIG_USER_ONLY
-    dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK;
-    page_protect(pc);
-#endif
-}
-
 void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
                      target_ulong pc, void *host_pc,
                      const TranslatorOps *ops, DisasContextBase *db)
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
     db->num_insns = 0;
     db->max_insns = max_insns;
     db->singlestep_enabled = cflags & CF_SINGLE_STEP;
-    translator_page_protect(db, db->pc_next);
+    db->host_addr[0] = host_pc;
+    db->host_addr[1] = NULL;
+
+#ifdef CONFIG_USER_ONLY
+    page_protect(pc);
+#endif
 
     ops->init_disas_context(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
 #endif
 }
 
-static inline void translator_maybe_page_protect(DisasContextBase *dcbase,
-                                                 target_ulong pc, size_t len)
+static void *translator_access(CPUArchState *env, DisasContextBase *db,
+                               target_ulong pc, size_t len)
 {
-#ifdef CONFIG_USER_ONLY
-    target_ulong end = pc + len - 1;
+    void *host;
+    target_ulong base, end;
+    TranslationBlock *tb;
 
-    if (end > dcbase->page_protect_end) {
-        translator_page_protect(dcbase, end);
+    tb = db->tb;
+
+    /* Use slow path if first page is MMIO. */
+    if (unlikely(tb->page_addr[0] == -1)) {
+        return NULL;
     }
+
+    end = pc + len - 1;
+    if (likely(is_same_page(db, end))) {
+        host = db->host_addr[0];
+        base = db->pc_first;
+    } else {
+        host = db->host_addr[1];
+        base = TARGET_PAGE_ALIGN(db->pc_first);
+        if (host == NULL) {
+            tb->page_addr[1] =
+                get_page_addr_code_hostp(env, base, &db->host_addr[1]);
+#ifdef CONFIG_USER_ONLY
+            page_protect(end);
 #endif
+            /* We cannot handle MMIO as second page. */
+            assert(tb->page_addr[1] != -1);
+            host = db->host_addr[1];
+        }
+
+        /* Use slow path when crossing pages. */
+        if (is_same_page(db, pc)) {
+            return NULL;
+        }
+    }
+
+    tcg_debug_assert(pc >= base);
+    return host + (pc - base);
 }
 
-#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn)             \
-    type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
-                           abi_ptr pc, bool do_swap)                    \
-    {                                                                   \
-        translator_maybe_page_protect(dcbase, pc, sizeof(type));        \
-        type ret = load_fn(env, pc);                                    \
-        if (do_swap) {                                                  \
-            ret = swap_fn(ret);                                         \
-        }                                                               \
-        plugin_insn_append(pc, &ret, sizeof(ret));                      \
-        return ret;                                                     \
+uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
+{
+    uint8_t ret;
+    void *p = translator_access(env, db, pc, sizeof(ret));
+
+    if (p) {
+        plugin_insn_append(pc, p, sizeof(ret));
+        return ldub_p(p);
     }
+    ret = cpu_ldub_code(env, pc);
+    plugin_insn_append(pc, &ret, sizeof(ret));
+    return ret;
+}
 
-FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
+uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
+{
+    uint16_t ret, plug;
+    void *p = translator_access(env, db, pc, sizeof(ret));
 
-#undef GEN_TRANSLATOR_LD
+    if (p) {
+        plugin_insn_append(pc, p, sizeof(ret));
+        return lduw_p(p);
+    }
+    ret = cpu_lduw_code(env, pc);
+    plug = tswap16(ret);
+    plugin_insn_append(pc, &plug, sizeof(ret));
+    return ret;
+}
+
+uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
+{
+    uint32_t ret, plug;
+    void *p = translator_access(env, db, pc, sizeof(ret));
+
+    if (p) {
+        plugin_insn_append(pc, p, sizeof(ret));
+        return ldl_p(p);
+    }
+    ret = cpu_ldl_code(env, pc);
+    plug = tswap32(ret);
+    plugin_insn_append(pc, &plug, sizeof(ret));
+    return ret;
+}
+
+uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
+{
+    uint64_t ret, plug;
+    void *p = translator_access(env, db, pc, sizeof(ret));
+
+    if (p) {
+        plugin_insn_append(pc, p, sizeof(ret));
+        return ldq_p(p);
+    }
+    ret = cpu_ldq_code(env, pc);
+    plug = tswap64(ret);
+    plugin_insn_append(pc, &plug, sizeof(ret));
+    return ret;
+}
-- 
2.34.1

From: Ilya Leoshkevich <iii@linux.ibm.com>

Right now translator stops right *after* the end of a page, which
breaks reporting of fault locations when the last instruction of a
multi-insn translation block crosses a page boundary.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220817150506.592862-3-iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/s390x/tcg/translate.c     |  15 +++-
 tests/tcg/s390x/noexec.c         | 106 +++++++++++++++++++++++
 tests/tcg/multiarch/noexec.c.inc | 139 +++++++++++++++++++++++++++++++
 tests/tcg/s390x/Makefile.target  |   1 +
 4 files changed, 257 insertions(+), 4 deletions(-)
 create mode 100644 tests/tcg/s390x/noexec.c
 create mode 100644 tests/tcg/multiarch/noexec.c.inc

diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
     dc->insn_start = tcg_last_op();
 }
 
+static target_ulong get_next_pc(CPUS390XState *env, DisasContext *s,
+                                uint64_t pc)
+{
+    uint64_t insn = ld_code2(env, s, pc);
+
+    return pc + get_ilen((insn >> 8) & 0xff);
+}
+
 static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 {
     CPUS390XState *env = cs->env_ptr;
@@ -XXX,XX +XXX,XX @@ static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
 
     dc->base.is_jmp = translate_one(env, dc);
     if (dc->base.is_jmp == DISAS_NEXT) {
-        uint64_t page_start;
-
-        page_start = dc->base.pc_first & TARGET_PAGE_MASK;
-        if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) {
+        if (!is_same_page(dcbase, dc->base.pc_next) ||
+            !is_same_page(dcbase, get_next_pc(env, dc, dc->base.pc_next)) ||
+            dc->ex_value) {
             dc->base.is_jmp = DISAS_TOO_MANY;
         }
     }
diff --git a/tests/tcg/s390x/noexec.c b/tests/tcg/s390x/noexec.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tests/tcg/s390x/noexec.c
@@ -XXX,XX +XXX,XX @@
+#include "../multiarch/noexec.c.inc"
+
+static void *arch_mcontext_pc(const mcontext_t *ctx)
+{
+    return (void *)ctx->psw.addr;
+}
+
+static int arch_mcontext_arg(const mcontext_t *ctx)
+{
+    return ctx->gregs[2];
+}
+
+static void arch_flush(void *p, int len)
+{
+}
+
+extern char noexec_1[];
+extern char noexec_2[];
+extern char noexec_end[];
+
+asm("noexec_1:\n"
+    "   lgfi %r2,1\n"       /* %r2 is 0 on entry, set 1. */
+    "noexec_2:\n"
+    "   lgfi %r2,2\n"       /* %r2 is 0/1; set 2. */
+    "   br %r14\n"          /* return */
+    "noexec_end:");
+
+extern char exrl_1[];
+extern char exrl_2[];
+extern char exrl_end[];
+
+asm("exrl_1:\n"
+    "   exrl %r0, exrl_2\n"
+    "   br %r14\n"
+    "exrl_2:\n"
+    "   lgfi %r2,2\n"
+    "exrl_end:");
+
+int main(void)
+{
+    struct noexec_test noexec_tests[] = {
+        {
+            .name = "fallthrough",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2,
+            .entry_ofs = noexec_1 - noexec_2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = 0,
+            .expected_arg = 1,
+        },
+        {
+            .name = "jump",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2,
+            .entry_ofs = 0,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = 0,
+            .expected_arg = 0,
+        },
+        {
+            .name = "exrl",
+            .test_code = exrl_1,
+            .test_len = exrl_end - exrl_1,
+            .page_ofs = exrl_1 - exrl_2,
+            .entry_ofs = exrl_1 - exrl_2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = exrl_1 - exrl_2,
+            .expected_arg = 0,
+        },
+        {
+            .name = "fallthrough [cross]",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2 - 2,
+            .entry_ofs = noexec_1 - noexec_2 - 2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = -2,
+            .expected_arg = 1,
+        },
+        {
+            .name = "jump [cross]",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2 - 2,
+            .entry_ofs = -2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = -2,
+            .expected_arg = 0,
+        },
+        {
+            .name = "exrl [cross]",
+            .test_code = exrl_1,
+            .test_len = exrl_end - exrl_1,
+            .page_ofs = exrl_1 - exrl_2 - 2,
+            .entry_ofs = exrl_1 - exrl_2 - 2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = exrl_1 - exrl_2 - 2,
+            .expected_arg = 0,
+        },
+    };
+
+    return test_noexec(noexec_tests,
+                       sizeof(noexec_tests) / sizeof(noexec_tests[0]));
+}
diff --git a/tests/tcg/multiarch/noexec.c.inc b/tests/tcg/multiarch/noexec.c.inc
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tests/tcg/multiarch/noexec.c.inc
@@ -XXX,XX +XXX,XX @@
+/*
+ * Common code for arch-specific MMU_INST_FETCH fault testing.
+ */
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/ucontext.h>
+
+/* Forward declarations. */
+
+static void *arch_mcontext_pc(const mcontext_t *ctx);
+static int arch_mcontext_arg(const mcontext_t *ctx);
+static void arch_flush(void *p, int len);
+
+/* Testing infrastructure. */
+
+struct noexec_test {
+    const char *name;
+    const char *test_code;
+    int test_len;
+    int page_ofs;
+    int entry_ofs;
+    int expected_si_ofs;
+    int expected_pc_ofs;
+    int expected_arg;
+};
+
+static void *page_base;
+static int page_size;
+static const struct noexec_test *current_noexec_test;
+
+static void handle_err(const char *syscall)
+{
+    printf("[  FAILED  ] %s: %s\n", syscall, strerror(errno));
+    exit(EXIT_FAILURE);
+}
+
+static void handle_segv(int sig, siginfo_t *info, void *ucontext)
+{
+    const struct noexec_test *test = current_noexec_test;
+    const mcontext_t *mc = &((ucontext_t *)ucontext)->uc_mcontext;
+    void *expected_si;
+    void *expected_pc;
+    void *pc;
+    int arg;
+
+    if (test == NULL) {
+        printf("[  FAILED  ] unexpected SEGV\n");
+        exit(EXIT_FAILURE);
+    }
+    current_noexec_test = NULL;
+
+    expected_si = page_base + test->expected_si_ofs;
+    if (info->si_addr != expected_si) {
+        printf("[  FAILED  ] wrong si_addr (%p != %p)\n",
+               info->si_addr, expected_si);
+        exit(EXIT_FAILURE);
+    }
+
+    pc = arch_mcontext_pc(mc);
+    expected_pc = page_base + test->expected_pc_ofs;
+    if (pc != expected_pc) {
+        printf("[  FAILED  ] wrong pc (%p != %p)\n", pc, expected_pc);
+        exit(EXIT_FAILURE);
+    }
+
+    arg = arch_mcontext_arg(mc);
+    if (arg != test->expected_arg) {
+        printf("[  FAILED  ] wrong arg (%d != %d)\n", arg, test->expected_arg);
+        exit(EXIT_FAILURE);
+    }
+
+    if (mprotect(page_base, page_size,
+                 PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
+        handle_err("mprotect");
+    }
+}
+
+static void test_noexec_1(const struct noexec_test *test)
+{
+    void *start = page_base + test->page_ofs;
+    void (*fn)(int arg) = page_base + test->entry_ofs;
+
+    memcpy(start, test->test_code, test->test_len);
+    arch_flush(start, test->test_len);
+
+    /* Trigger TB creation in order to test invalidation. */
+    fn(0);
+
+    if (mprotect(page_base, page_size, PROT_NONE) < 0) {
+        handle_err("mprotect");
+    }
+
+    /* Trigger SEGV and check that handle_segv() ran. */
+    current_noexec_test = test;
+    fn(0);
+    assert(current_noexec_test == NULL);
+}
+
+static int test_noexec(struct noexec_test *tests, size_t n_tests)
+{
+    struct sigaction act;
+    size_t i;
+
+    memset(&act, 0, sizeof(act));
+    act.sa_sigaction = handle_segv;
+    act.sa_flags = SA_SIGINFO;
+    if (sigaction(SIGSEGV, &act, NULL) < 0) {
+        handle_err("sigaction");
+    }
+
+    page_size = getpagesize();
+    page_base = mmap(NULL, 2 * page_size,
+                     PROT_READ | PROT_WRITE | PROT_EXEC,
+                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    if (page_base == MAP_FAILED) {
+        handle_err("mmap");
+    }
+    page_base += page_size;
+
+    for (i = 0; i < n_tests; i++) {
+        struct noexec_test *test = &tests[i];
+
+        printf("[ RUN      ] %s\n", test->name);
+        test_noexec_1(test);
+        printf("[       OK ]\n");
+    }
+
+    printf("[  PASSED  ]\n");
+    return EXIT_SUCCESS;
+}
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index XXXXXXX..XXXXXXX 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -XXX,XX +XXX,XX @@ TESTS+=shift
 TESTS+=trap
 TESTS+=signals-s390x
 TESTS+=branch-relative-long
+TESTS+=noexec
 
 Z14_TESTS=vfminmax
 vfminmax: LDFLAGS+=-lm
-- 
2.34.1

From: Ilya Leoshkevich <iii@linux.ibm.com>

Right now translator stops right *after* the end of a page, which
breaks reporting of fault locations when the last instruction of a
multi-insn translation block crosses a page boundary.

An implementation, like the one arm and s390x have, would require an
i386 length disassembler, which is burdensome to maintain. Another
alternative would be to single-step at the end of a guest page, but
this may come with a performance impact.

Fix by snapshotting disassembly state and restoring it after we figure
out we crossed a page boundary. This includes rolling back cc_op
updates and emitted ops.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1143
Message-Id: <20220817150506.592862-4-iii@linux.ibm.com>
[rth: Simplify end-of-insn cross-page checks.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/i386/tcg/translate.c      | 64 ++++++++++++++++-----------
 tests/tcg/x86_64/noexec.c        | 75 ++++++++++++++++++++++++++++++++
 tests/tcg/x86_64/Makefile.target |  3 +-
 3 files changed, 116 insertions(+), 26 deletions(-)
 create mode 100644 tests/tcg/x86_64/noexec.c

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
     TCGv_i64 tmp1_i64;
 
     sigjmp_buf jmpbuf;
+    TCGOp *prev_insn_end;
 } DisasContext;
 
 /* The environment in which user-only runs is constrained. */
@@ -XXX,XX +XXX,XX @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
 {
     uint64_t pc = s->pc;
 
+    /* This is a subsequent insn that crosses a page boundary.  */
+    if (s->base.num_insns > 1 &&
+        !is_same_page(&s->base, s->pc + num_bytes - 1)) {
+        siglongjmp(s->jmpbuf, 2);
+    }
+
     s->pc += num_bytes;
     if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
         /* If the instruction's 16th byte is on a different page than the 1st, a
@@ -XXX,XX +XXX,XX @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     int modrm, reg, rm, mod, op, opreg, val;
     target_ulong next_eip, tval;
     target_ulong pc_start = s->base.pc_next;
+    bool orig_cc_op_dirty = s->cc_op_dirty;
+    CCOp orig_cc_op = s->cc_op;
 
     s->pc_start = s->pc = pc_start;
     s->override = -1;
@@ -XXX,XX +XXX,XX @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     s->rip_offset = 0; /* for relative ip address */
     s->vex_l = 0;
     s->vex_v = 0;
-    if (sigsetjmp(s->jmpbuf, 0) != 0) {
+    switch (sigsetjmp(s->jmpbuf, 0)) {
+    case 0:
+        break;
+    case 1:
         gen_exception_gpf(s);
         return s->pc;
+    case 2:
+        /* Restore state that may affect the next instruction. */
+        s->cc_op_dirty = orig_cc_op_dirty;
+        s->cc_op = orig_cc_op;
+        s->base.num_insns--;
+        tcg_remove_ops_after(s->prev_insn_end);
+        s->base.is_jmp = DISAS_TOO_MANY;
+        return pc_start;
+    default:
+        g_assert_not_reached();
     }
 
     prefixes = 0;
@@ -XXX,XX +XXX,XX @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
 
+    dc->prev_insn_end = tcg_last_op();
     tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
 }
 
@@ -XXX,XX +XXX,XX @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
 #endif
 
     pc_next = disas_insn(dc, cpu);
-
-    if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
-        /* if single step mode, we generate only one instruction and
-           generate an exception */
-        /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
-           the flag and abort the translation to give the irqs a
-           chance to happen */
-        dc->base.is_jmp = DISAS_TOO_MANY;
-    } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
-               && ((pc_next & TARGET_PAGE_MASK)
-                   != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
-                       & TARGET_PAGE_MASK)
-                   || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
-        /* Do not cross the boundary of the pages in icount mode,
-           it can cause an exception. Do it only when boundary is
-           crossed by the first instruction in the block.
-           If current instruction already crossed the bound - it's ok,
-           because an exception hasn't stopped this code.
-         */
-        dc->base.is_jmp = DISAS_TOO_MANY;
-    } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
-        dc->base.is_jmp = DISAS_TOO_MANY;
-    }
-
     dc->base.pc_next = pc_next;
+
+    if (dc->base.is_jmp == DISAS_NEXT) {
+        if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
+            /*
+             * If single step mode, we generate only one instruction and
+             * generate an exception.
+             * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
+             * the flag and abort the translation to give the irqs a
+             * chance to happen.
+             */
+            dc->base.is_jmp = DISAS_TOO_MANY;
+        } else if (!is_same_page(&dc->base, pc_next)) {
+            dc->base.is_jmp = DISAS_TOO_MANY;
+        }
+    }
 }
 
 static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
diff --git a/tests/tcg/x86_64/noexec.c b/tests/tcg/x86_64/noexec.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tests/tcg/x86_64/noexec.c
@@ -XXX,XX +XXX,XX @@
+#include "../multiarch/noexec.c.inc"
+
+static void *arch_mcontext_pc(const mcontext_t *ctx)
+{
+    return (void *)ctx->gregs[REG_RIP];
+}
+
+int arch_mcontext_arg(const mcontext_t *ctx)
+{
+    return ctx->gregs[REG_RDI];
+}
+
+static void arch_flush(void *p, int len)
+{
+}
+
+extern char noexec_1[];
+extern char noexec_2[];
+extern char noexec_end[];
+
+asm("noexec_1:\n"
+    "    movq $1,%rdi\n"    /* %rdi is 0 on entry, set 1. */
+    "noexec_2:\n"
+    "    movq $2,%rdi\n"    /* %rdi is 0/1; set 2. */
+    "    ret\n"
+    "noexec_end:");
+
+int main(void)
+{
+    struct noexec_test noexec_tests[] = {
+        {
+            .name = "fallthrough",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2,
+            .entry_ofs = noexec_1 - noexec_2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = 0,
+            .expected_arg = 1,
+        },
+        {
+            .name = "jump",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2,
+            .entry_ofs = 0,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = 0,
+            .expected_arg = 0,
+        },
+        {
+            .name = "fallthrough [cross]",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2 - 2,
+            .entry_ofs = noexec_1 - noexec_2 - 2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = -2,
+            .expected_arg = 1,
+        },
+        {
+            .name = "jump [cross]",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2 - 2,
+            .entry_ofs = -2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = -2,
+            .expected_arg = 0,
+        },
+    };
+
+    return test_noexec(noexec_tests,
+                       sizeof(noexec_tests) / sizeof(noexec_tests[0]));
+}
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index XXXXXXX..XXXXXXX 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -XXX,XX +XXX,XX @@ include $(SRC_PATH)/tests/tcg/i386/Makefile.target
 
 ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET))
 X86_64_TESTS += vsyscall
+X86_64_TESTS += noexec
 TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
 else
 TESTS=$(MULTIARCH_TESTS)
@@ -XXX,XX +XXX,XX @@ test-x86_64: LDFLAGS+=-lm -lc
 test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
 	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
 
-vsyscall: $(SRC_PATH)/tests/tcg/x86_64/vsyscall.c
+%: $(SRC_PATH)/tests/tcg/x86_64/%.c
 	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
-- 
2.34.1

These will be useful in properly ending the TB.

Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/riscv/translate.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -XXX,XX +XXX,XX @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
 /* Include decoders for factored-out extensions */
 #include "decode-XVentanaCondOps.c.inc"
 
+/* The specification allows for longer insns, but not supported by qemu. */
+#define MAX_INSN_LEN  4
+
+static inline int insn_len(uint16_t first_word)
+{
+    return (first_word & 3) == 3 ? 4 : 2;
+}
+
 static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
 {
     /*
@@ -XXX,XX +XXX,XX @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
     };
 
     /* Check for compressed insn */
-    if (extract16(opcode, 0, 2) != 3) {
+    if (insn_len(opcode) == 2) {
         if (!has_ext(ctx, RVC)) {
             gen_exception_illegal(ctx);
         } else {
-- 
2.34.1

Right now the translator stops right *after* the end of a page, which
breaks reporting of fault locations when the last instruction of a
multi-insn translation block crosses a page boundary.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1155
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/riscv/translate.c          | 17 +++++--
 tests/tcg/riscv64/noexec.c        | 79 +++++++++++++++++++++++++++++++
 tests/tcg/riscv64/Makefile.target |  1 +
 3 files changed, 93 insertions(+), 4 deletions(-)
 create mode 100644 tests/tcg/riscv64/noexec.c

diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -XXX,XX +XXX,XX @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
     }
     ctx->nftemp = 0;
 
+    /* Only the first insn within a TB is allowed to cross a page boundary. */
     if (ctx->base.is_jmp == DISAS_NEXT) {
-        target_ulong page_start;
-
-        page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
-        if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE) {
+        if (!is_same_page(&ctx->base, ctx->base.pc_next)) {
             ctx->base.is_jmp = DISAS_TOO_MANY;
+        } else {
+            unsigned page_ofs = ctx->base.pc_next & ~TARGET_PAGE_MASK;
+
+            if (page_ofs > TARGET_PAGE_SIZE - MAX_INSN_LEN) {
+                uint16_t next_insn = cpu_lduw_code(env, ctx->base.pc_next);
+                int len = insn_len(next_insn);
+
+                if (!is_same_page(&ctx->base, ctx->base.pc_next + len)) {
+                    ctx->base.is_jmp = DISAS_TOO_MANY;
+                }
+            }
         }
     }
 }
diff --git a/tests/tcg/riscv64/noexec.c b/tests/tcg/riscv64/noexec.c
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/tests/tcg/riscv64/noexec.c
@@ -XXX,XX +XXX,XX @@
+#include "../multiarch/noexec.c.inc"
+
+static void *arch_mcontext_pc(const mcontext_t *ctx)
+{
+    return (void *)ctx->__gregs[REG_PC];
+}
+
+static int arch_mcontext_arg(const mcontext_t *ctx)
+{
+    return ctx->__gregs[REG_A0];
+}
+
+static void arch_flush(void *p, int len)
+{
+    __builtin___clear_cache(p, p + len);
+}
+
+extern char noexec_1[];
+extern char noexec_2[];
+extern char noexec_end[];
+
+asm(".option push\n"
+    ".option norvc\n"
+    "noexec_1:\n"
+    "   li a0,1\n"       /* a0 is 0 on entry, set 1. */
+    "noexec_2:\n"
+    "   li a0,2\n"      /* a0 is 0/1; set 2. */
+    "   ret\n"
+    "noexec_end:\n"
+    ".option pop");
+
+int main(void)
+{
+    struct noexec_test noexec_tests[] = {
+        {
+            .name = "fallthrough",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2,
+            .entry_ofs = noexec_1 - noexec_2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = 0,
+            .expected_arg = 1,
+        },
+        {
+            .name = "jump",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2,
+            .entry_ofs = 0,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = 0,
+            .expected_arg = 0,
+        },
+        {
+            .name = "fallthrough [cross]",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2 - 2,
+            .entry_ofs = noexec_1 - noexec_2 - 2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = -2,
+            .expected_arg = 1,
+        },
+        {
+            .name = "jump [cross]",
+            .test_code = noexec_1,
+            .test_len = noexec_end - noexec_1,
+            .page_ofs = noexec_1 - noexec_2 - 2,
+            .entry_ofs = -2,
+            .expected_si_ofs = 0,
+            .expected_pc_ofs = -2,
+            .expected_arg = 0,
+        },
+    };
+
+    return test_noexec(noexec_tests,
+                       sizeof(noexec_tests) / sizeof(noexec_tests[0]));
+}
diff --git a/tests/tcg/riscv64/Makefile.target b/tests/tcg/riscv64/Makefile.target
index XXXXXXX..XXXXXXX 100644
--- a/tests/tcg/riscv64/Makefile.target
+++ b/tests/tcg/riscv64/Makefile.target
@@ -XXX,XX +XXX,XX @@
 
 VPATH += $(SRC_PATH)/tests/tcg/riscv64
 TESTS += test-div
+TESTS += noexec
-- 
2.34.1

TCG patch queue, plus one target/sh4 patch that
Yoshinori Sato asked me to process.

The following changes since commit efbf38d73e5dcc4d5f8b98c6e7a12be1f3b91745:

Merge tag 'for-upstream' of git://repo.or.cz/qemu/kevin into staging (2022-10-03 15:06:07 -0400)

are available in the Git repository at:

https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20221004

for you to fetch changes up to ab419fd8a035a65942de4e63effcd55ccbf1a9fe:

target/sh4: Fix TB_FLAG_UNALIGN (2022-10-04 12:33:05 -0700)

----------------------------------------------------------------
Cache CPUClass for use in hot code paths.
Add CPUTLBEntryFull, probe_access_full, tlb_set_page_full.
Add generic support for TARGET_TB_PCREL.
tcg/ppc: Optimize 26-bit jumps using STQ for POWER 2.07
target/sh4: Fix TB_FLAG_UNALIGN

----------------------------------------------------------------
Alex Bennée (3):
      cpu: cache CPUClass in CPUState for hot code paths
      hw/core/cpu-sysemu: used cached class in cpu_asidx_from_attrs
      cputlb: used cached CPUClass in our hot-paths

Leandro Lupori (1):
      tcg/ppc: Optimize 26-bit jumps

Richard Henderson (16):
      accel/tcg: Rename CPUIOTLBEntry to CPUTLBEntryFull
      accel/tcg: Drop addr member from SavedIOTLB
      accel/tcg: Suppress auto-invalidate in probe_access_internal
      accel/tcg: Introduce probe_access_full
      accel/tcg: Introduce tlb_set_page_full
      include/exec: Introduce TARGET_PAGE_ENTRY_EXTRA
      accel/tcg: Remove PageDesc code_bitmap
      accel/tcg: Use bool for page_find_alloc
      accel/tcg: Use DisasContextBase in plugin_gen_tb_start
      accel/tcg: Do not align tb->page_addr[0]
      accel/tcg: Inline tb_flush_jmp_cache
      include/hw/core: Create struct CPUJumpCache
      hw/core: Add CPUClass.get_pc
      accel/tcg: Introduce tb_pc and log_pc
      accel/tcg: Introduce TARGET_TB_PCREL
      target/sh4: Fix TB_FLAG_UNALIGN

From: Alex Bennée <alex.bennee@linaro.org>

The class cast checkers are quite expensive and always on (unlike the
dynamic case who's checks are gated by CONFIG_QOM_CAST_DEBUG). To
avoid the overhead of repeatedly checking something which should never
change we cache the CPUClass reference for use in the hot code paths.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220811151413.3350684-3-alex.bennee@linaro.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-Id: <20220923084803.498337-3-clg@kaod.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/hw/core/cpu.h | 9 +++++++++
 cpu.c                 | 9 ++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index XXXXXXX..XXXXXXX 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -XXX,XX +XXX,XX @@ typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
  */
 #define CPU(obj) ((CPUState *)(obj))
 
+/*
+ * The class checkers bring in CPU_GET_CLASS() which is potentially
+ * expensive given the eventual call to
+ * object_class_dynamic_cast_assert(). Because of this the CPUState
+ * has a cached value for the class in cs->cc which is set up in
+ * cpu_exec_realizefn() for use in hot code paths.
+ */
 typedef struct CPUClass CPUClass;
 DECLARE_CLASS_CHECKERS(CPUClass, CPU,
                        TYPE_CPU)
@@ -XXX,XX +XXX,XX @@ struct qemu_work_item;
 struct CPUState {
     /*< private >*/
     DeviceState parent_obj;
+    /* cache to avoid expensive CPU_GET_CLASS */
+    CPUClass *cc;
     /*< public >*/
 
     int nr_cores;
diff --git a/cpu.c b/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/cpu.c
+++ b/cpu.c
@@ -XXX,XX +XXX,XX @@ const VMStateDescription vmstate_cpu_common = {
 
 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
 {
-#ifndef CONFIG_USER_ONLY
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-#endif
+    /* cache the cpu class for the hotpath */
+    cpu->cc = CPU_GET_CLASS(cpu);
 
     cpu_list_add(cpu);
     if (!accel_cpu_realizefn(cpu, errp)) {
@@ -XXX,XX +XXX,XX @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
     }
-    if (cc->sysemu_ops->legacy_vmsd != NULL) {
-        vmstate_register(NULL, cpu->cpu_index, cc->sysemu_ops->legacy_vmsd, cpu);
+    if (cpu->cc->sysemu_ops->legacy_vmsd != NULL) {
+        vmstate_register(NULL, cpu->cpu_index, cpu->cc->sysemu_ops->legacy_vmsd, cpu);
     }
 #endif /* CONFIG_USER_ONLY */
 }
-- 
2.34.1

From: Alex Bennée <alex.bennee@linaro.org>

This is a heavily used function so lets avoid the cost of
CPU_GET_CLASS. On the romulus-bmc run it has a modest effect:

Before: 36.812 s ±  0.506 s
  After:  35.912 s ±  0.168 s

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220811151413.3350684-4-alex.bennee@linaro.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-Id: <20220923084803.498337-4-clg@kaod.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 hw/core/cpu-sysemu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hw/core/cpu-sysemu.c b/hw/core/cpu-sysemu.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/core/cpu-sysemu.c
+++ b/hw/core/cpu-sysemu.c
@@ -XXX,XX +XXX,XX @@ hwaddr cpu_get_phys_page_debug(CPUState *cpu, vaddr addr)
 
 int cpu_asidx_from_attrs(CPUState *cpu, MemTxAttrs attrs)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     int ret = 0;
 
-    if (cc->sysemu_ops->asidx_from_attrs) {
-        ret = cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
+    if (cpu->cc->sysemu_ops->asidx_from_attrs) {
+        ret = cpu->cc->sysemu_ops->asidx_from_attrs(cpu, attrs);
         assert(ret < cpu->num_ases && ret >= 0);
     }
     return ret;
-- 
2.34.1

From: Alex Bennée <alex.bennee@linaro.org>

Before: 35.912 s ±  0.168 s
  After: 35.565 s ±  0.087 s

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220811151413.3350684-5-alex.bennee@linaro.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Message-Id: <20220923084803.498337-5-clg@kaod.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
                      MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
     bool ok;
 
     /*
      * This is not a probe, so only valid return is success; failure
      * should result in exception + longjmp to the cpu loop.
      */
-    ok = cc->tcg_ops->tlb_fill(cpu, addr, size,
-                               access_type, mmu_idx, false, retaddr);
+    ok = cpu->cc->tcg_ops->tlb_fill(cpu, addr, size,
+                                    access_type, mmu_idx, false, retaddr);
     assert(ok);
 }
 
@@ -XXX,XX +XXX,XX @@ static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
                                         MMUAccessType access_type,
                                         int mmu_idx, uintptr_t retaddr)
 {
-    CPUClass *cc = CPU_GET_CLASS(cpu);
-
-    cc->tcg_ops->do_unaligned_access(cpu, addr, access_type, mmu_idx, retaddr);
+    cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
+                                          mmu_idx, retaddr);
 }
 
 static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
     if (!tlb_hit_page(tlb_addr, page_addr)) {
         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
             CPUState *cs = env_cpu(env);
-            CPUClass *cc = CPU_GET_CLASS(cs);
 
-            if (!cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
-                                       mmu_idx, nonfault, retaddr)) {
+            if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
+                                           mmu_idx, nonfault, retaddr)) {
                 /* Non-faulting page table read failed.  */
                 *phost = NULL;
                 return TLB_INVALID_MASK;
-- 
2.34.1

This structure will shortly contain more than just
data for accessing MMIO.  Rename the 'addr' member
to 'xlat_section' to more clearly indicate its purpose.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu-defs.h    |  22 ++++----
 accel/tcg/cputlb.c         | 102 +++++++++++++++++++------------------
 target/arm/mte_helper.c    |  14 ++---
 target/arm/sve_helper.c    |   4 +-
 target/arm/translate-a64.c |   2 +-
 5 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -XXX,XX +XXX,XX @@ typedef uint64_t target_ulong;
 #  endif
 # endif
 
+/* Minimalized TLB entry for use by TCG fast path. */
 typedef struct CPUTLBEntry {
     /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
        bit TARGET_PAGE_BITS-1..4  : Nonzero for accesses that should not
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntry {
 
 QEMU_BUILD_BUG_ON(sizeof(CPUTLBEntry) != (1 << CPU_TLB_ENTRY_BITS));
 
-/* The IOTLB is not accessed directly inline by generated TCG code,
- * so the CPUIOTLBEntry layout is not as critical as that of the
- * CPUTLBEntry. (This is also why we don't want to combine the two
- * structs into one.)
+/*
+ * The full TLB entry, which is not accessed by generated TCG code,
+ * so the layout is not as critical as that of CPUTLBEntry. This is
+ * also why we don't want to combine the two structs.
  */
-typedef struct CPUIOTLBEntry {
+typedef struct CPUTLBEntryFull {
     /*
-     * @addr contains:
+     * @xlat_section contains:
      *  - in the lower TARGET_PAGE_BITS, a physical section number
      *  - with the lower TARGET_PAGE_BITS masked off, an offset which
      *    must be added to the virtual address to obtain:
@@ -XXX,XX +XXX,XX @@ typedef struct CPUIOTLBEntry {
      *       number is PHYS_SECTION_NOTDIRTY or PHYS_SECTION_ROM)
      *     + the offset within the target MemoryRegion (otherwise)
      */
-    hwaddr addr;
+    hwaddr xlat_section;
     MemTxAttrs attrs;
-} CPUIOTLBEntry;
+} CPUTLBEntryFull;
 
 /*
  * Data elements that are per MMU mode, minus the bits accessed by
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBDesc {
     size_t vindex;
     /* The tlb victim table, in two parts.  */
     CPUTLBEntry vtable[CPU_VTLB_SIZE];
-    CPUIOTLBEntry viotlb[CPU_VTLB_SIZE];
-    /* The iotlb.  */
-    CPUIOTLBEntry *iotlb;
+    CPUTLBEntryFull vfulltlb[CPU_VTLB_SIZE];
+    CPUTLBEntryFull *fulltlb;
 } CPUTLBDesc;
 
 /*
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
     }
 
     g_free(fast->table);
-    g_free(desc->iotlb);
+    g_free(desc->fulltlb);
 
     tlb_window_reset(desc, now, 0);
     /* desc->n_used_entries is cleared by the caller */
     fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
     fast->table = g_try_new(CPUTLBEntry, new_size);
-    desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
+    desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
 
     /*
      * If the allocations fail, try smaller sizes. We just freed some
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
      * allocations to fail though, so we progressively reduce the allocation
      * size, aborting if we cannot even allocate the smallest TLB we support.
      */
-    while (fast->table == NULL || desc->iotlb == NULL) {
+    while (fast->table == NULL || desc->fulltlb == NULL) {
         if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
             error_report("%s: %s", __func__, strerror(errno));
             abort();
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
         fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 
         g_free(fast->table);
-        g_free(desc->iotlb);
+        g_free(desc->fulltlb);
         fast->table = g_try_new(CPUTLBEntry, new_size);
-        desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
+        desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
     }
 }
 
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
     desc->n_used_entries = 0;
     fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
     fast->table = g_new(CPUTLBEntry, n_entries);
-    desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
+    desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
     tlb_mmu_flush_locked(desc, fast);
 }
 
@@ -XXX,XX +XXX,XX @@ void tlb_destroy(CPUState *cpu)
         CPUTLBDescFast *fast = &env_tlb(env)->f[i];
 
         g_free(fast->table);
-        g_free(desc->iotlb);
+        g_free(desc->fulltlb);
     }
 }
 
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
 
         /* Evict the old entry into the victim tlb.  */
         copy_tlb_helper_locked(tv, te);
-        desc->viotlb[vidx] = desc->iotlb[index];
+        desc->vfulltlb[vidx] = desc->fulltlb[index];
         tlb_n_used_entries_dec(env, mmu_idx);
     }
 
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
      * subtract here is that of the page base, and not the same as the
      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
      */
-    desc->iotlb[index].addr = iotlb - vaddr_page;
-    desc->iotlb[index].attrs = attrs;
+    desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
+    desc->fulltlb[index].attrs = attrs;
 
     /* Now calculate the new entry */
     tn.addend = addend - vaddr_page;
@@ -XXX,XX +XXX,XX @@ static inline void cpu_transaction_failed(CPUState *cpu, hwaddr physaddr,
     }
 }
 
-static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
                          int mmu_idx, target_ulong addr, uintptr_t retaddr,
                          MMUAccessType access_type, MemOp op)
 {
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
     bool locked = false;
     MemTxResult r;
 
-    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
     mr = section->mr;
-    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
     if (!cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
         qemu_mutex_lock_iothread();
         locked = true;
     }
-    r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
+    r = memory_region_dispatch_read(mr, mr_offset, &val, op, full->attrs);
     if (r != MEMTX_OK) {
         hwaddr physaddr = mr_offset +
             section->offset_within_address_space -
             section->offset_within_region;
 
         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
-                               mmu_idx, iotlbentry->attrs, r, retaddr);
+                               mmu_idx, full->attrs, r, retaddr);
     }
     if (locked) {
         qemu_mutex_unlock_iothread();
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
 }
 
 /*
- * Save a potentially trashed IOTLB entry for later lookup by plugin.
- * This is read by tlb_plugin_lookup if the iotlb entry doesn't match
+ * Save a potentially trashed CPUTLBEntryFull for later lookup by plugin.
+ * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
  * because of the side effect of io_writex changing memory layout.
  */
 static void save_iotlb_data(CPUState *cs, hwaddr addr,
@@ -XXX,XX +XXX,XX @@ static void save_iotlb_data(CPUState *cs, hwaddr addr,
 #endif
 }
 
-static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
+static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
                       int mmu_idx, uint64_t val, target_ulong addr,
                       uintptr_t retaddr, MemOp op)
 {
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
     bool locked = false;
     MemTxResult r;
 
-    section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
+    section = iotlb_to_section(cpu, full->xlat_section, full->attrs);
     mr = section->mr;
-    mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+    mr_offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
     if (!cpu->can_do_io) {
         cpu_io_recompile(cpu, retaddr);
     }
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
      * The memory_region_dispatch may trigger a flush/resize
      * so for plugins we save the iotlb_data just in case.
      */
-    save_iotlb_data(cpu, iotlbentry->addr, section, mr_offset);
+    save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
 
     if (!qemu_mutex_iothread_locked()) {
         qemu_mutex_lock_iothread();
         locked = true;
     }
-    r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
+    r = memory_region_dispatch_write(mr, mr_offset, val, op, full->attrs);
     if (r != MEMTX_OK) {
         hwaddr physaddr = mr_offset +
             section->offset_within_address_space -
             section->offset_within_region;
 
         cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
-                               MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
+                               MMU_DATA_STORE, mmu_idx, full->attrs, r,
                                retaddr);
     }
     if (locked) {
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
             copy_tlb_helper_locked(vtlb, &tmptlb);
             qemu_spin_unlock(&env_tlb(env)->c.lock);
 
-            CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
-            CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
-            tmpio = *io; *io = *vio; *vio = tmpio;
+            CPUTLBEntryFull *f1 = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+            CPUTLBEntryFull *f2 = &env_tlb(env)->d[mmu_idx].vfulltlb[vidx];
+            CPUTLBEntryFull tmpf;
+            tmpf = *f1; *f1 = *f2; *f2 = tmpf;
             return true;
         }
     }
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
                  (ADDR) & TARGET_PAGE_MASK)
 
 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
-                           CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
+                           CPUTLBEntryFull *full, uintptr_t retaddr)
 {
-    ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
+    ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
 
     trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
 
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
     /* Handle clean RAM pages.  */
     if (unlikely(flags & TLB_NOTDIRTY)) {
         uintptr_t index = tlb_index(env, mmu_idx, addr);
-        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 
-        notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
+        notdirty_write(env_cpu(env), addr, 1, full, retaddr);
         flags &= ~TLB_NOTDIRTY;
     }
 
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
 
     if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
         uintptr_t index = tlb_index(env, mmu_idx, addr);
-        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 
         /* Handle watchpoints.  */
         if (flags & TLB_WATCHPOINT) {
             int wp_access = (access_type == MMU_DATA_STORE
                              ? BP_MEM_WRITE : BP_MEM_READ);
             cpu_check_watchpoint(env_cpu(env), addr, size,
-                                 iotlbentry->attrs, wp_access, retaddr);
+                                 full->attrs, wp_access, retaddr);
         }
 
         /* Handle clean RAM pages.  */
         if (flags & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
+            notdirty_write(env_cpu(env), addr, 1, full, retaddr);
         }
     }
 
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
  * should have just filled the TLB. The one corner case is io_writex
  * which can cause TLB flushes and potential resizing of the TLBs
  * losing the information we need. In those cases we need to recover
- * data from a copy of the iotlbentry. As long as this always occurs
+ * data from a copy of the CPUTLBEntryFull. As long as this always occurs
  * from the same thread (which a mem callback will be) this is safe.
  */
 
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
     if (likely(tlb_hit(tlb_addr, addr))) {
         /* We must have an iotlb entry for MMIO */
         if (tlb_addr & TLB_MMIO) {
-            CPUIOTLBEntry *iotlbentry;
-            iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+            CPUTLBEntryFull *full;
+            full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
             data->is_io = true;
-            data->v.io.section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
-            data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+            data->v.io.section =
+                iotlb_to_section(cpu, full->xlat_section, full->attrs);
+            data->v.io.offset = (full->xlat_section & TARGET_PAGE_MASK) + addr;
         } else {
             data->is_io = false;
             data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 
     if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
         notdirty_write(env_cpu(env), addr, size,
-                       &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
+                       &env_tlb(env)->d[mmu_idx].fulltlb[index], retaddr);
     }
 
     return hostaddr;
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
 
     /* Handle anything that isn't just a straight memory access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
+        CPUTLBEntryFull *full;
         bool need_swap;
 
         /* For anything that is unaligned, recurse through full_load.  */
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
             goto do_unaligned_access;
         }
 
-        iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 
         /* Handle watchpoints.  */
         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
             /* On watchpoint hit, this will longjmp out.  */
             cpu_check_watchpoint(env_cpu(env), addr, size,
-                                 iotlbentry->attrs, BP_MEM_READ, retaddr);
+                                 full->attrs, BP_MEM_READ, retaddr);
         }
 
         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
 
         /* Handle I/O access.  */
         if (likely(tlb_addr & TLB_MMIO)) {
-            return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
+            return io_readx(env, full, mmu_idx, addr, retaddr,
                             access_type, op ^ (need_swap * MO_BSWAP));
         }
 
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
      */
     if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
         cpu_check_watchpoint(env_cpu(env), addr, size - size2,
-                             env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
+                             env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
                              BP_MEM_WRITE, retaddr);
     }
     if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
         cpu_check_watchpoint(env_cpu(env), page2, size2,
-                             env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
+                             env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
                              BP_MEM_WRITE, retaddr);
     }
 
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
 
     /* Handle anything that isn't just a straight memory access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
+        CPUTLBEntryFull *full;
         bool need_swap;
 
         /* For anything that is unaligned, recurse through byte stores.  */
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
             goto do_unaligned_access;
         }
 
-        iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+        full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
 
         /* Handle watchpoints.  */
         if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
             /* On watchpoint hit, this will longjmp out.  */
             cpu_check_watchpoint(env_cpu(env), addr, size,
-                                 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
+                                 full->attrs, BP_MEM_WRITE, retaddr);
         }
 
         need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
 
         /* Handle I/O access.  */
         if (tlb_addr & TLB_MMIO) {
-            io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
+            io_writex(env, full, mmu_idx, val, addr, retaddr,
                       op ^ (need_swap * MO_BSWAP));
             return;
         }
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
 
         /* Handle clean RAM pages.  */
         if (tlb_addr & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
+            notdirty_write(env_cpu(env), addr, size, full, retaddr);
         }
 
         haddr = (void *)((uintptr_t)addr + entry->addend);
diff --git a/target/arm/mte_helper.c b/target/arm/mte_helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/mte_helper.c
+++ b/target/arm/mte_helper.c
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
     return tags + index;
 #else
     uintptr_t index;
-    CPUIOTLBEntry *iotlbentry;
+    CPUTLBEntryFull *full;
     int in_page, flags;
     ram_addr_t ptr_ra;
     hwaddr ptr_paddr, tag_paddr, xlat;
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
     assert(!(flags & TLB_INVALID_MASK));
 
     /*
-     * Find the iotlbentry for ptr.  This *must* be present in the TLB
+     * Find the CPUTLBEntryFull for ptr.  This *must* be present in the TLB
      * because we just found the mapping.
      * TODO: Perhaps there should be a cputlb helper that returns a
      * matching tlb entry + iotlb entry.
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
         g_assert(tlb_hit(comparator, ptr));
     }
 # endif
-    iotlbentry = &env_tlb(env)->d[ptr_mmu_idx].iotlb[index];
+    full = &env_tlb(env)->d[ptr_mmu_idx].fulltlb[index];
 
     /* If the virtual page MemAttr != Tagged, access unchecked. */
-    if (!arm_tlb_mte_tagged(&iotlbentry->attrs)) {
+    if (!arm_tlb_mte_tagged(&full->attrs)) {
         return NULL;
     }
 
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
         int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE;
         assert(ra != 0);
         cpu_check_watchpoint(env_cpu(env), ptr, ptr_size,
-                             iotlbentry->attrs, wp, ra);
+                             full->attrs, wp, ra);
     }
 
     /*
@@ -XXX,XX +XXX,XX @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx,
     tag_paddr = ptr_paddr >> (LOG2_TAG_GRANULE + 1);
 
     /* Look up the address in tag space. */
-    tag_asi = iotlbentry->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
+    tag_asi = full->attrs.secure ? ARMASIdx_TagS : ARMASIdx_TagNS;
     tag_as = cpu_get_address_space(env_cpu(env), tag_asi);
     mr = address_space_translate(tag_as, tag_paddr, &xlat, NULL,
                                  tag_access == MMU_DATA_STORE,
-                                 iotlbentry->attrs);
+                                 full->attrs);
 
     /*
      * Note that @mr will never be NULL.  If there is nothing in the address
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -XXX,XX +XXX,XX @@ bool sve_probe_page(SVEHostPage *info, bool nofault, CPUARMState *env,
         g_assert(tlb_hit(comparator, addr));
 # endif
 
-        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
-        info->attrs = iotlbentry->attrs;
+        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+        info->attrs = full->attrs;
     }
 #endif
 
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -XXX,XX +XXX,XX @@ static bool is_guarded_page(CPUARMState *env, DisasContext *s)
      * table entry even for that case.
      */
     return (tlb_hit(entry->addr_code, addr) &&
-            arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].iotlb[index].attrs));
+            arm_tlb_bti_gp(&env_tlb(env)->d[mmu_idx].fulltlb[index].attrs));
 #endif
 }
 
-- 
2.34.1

This field is only written, not read; remove it.

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index XXXXXXX..XXXXXXX 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -XXX,XX +XXX,XX @@ struct CPUWatchpoint {
  * the memory regions get moved around  by io_writex.
  */
 typedef struct SavedIOTLB {
-    hwaddr addr;
     MemoryRegionSection *section;
     hwaddr mr_offset;
 } SavedIOTLB;
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static uint64_t io_readx(CPUArchState *env, CPUTLBEntryFull *full,
  * This is read by tlb_plugin_lookup if the fulltlb entry doesn't match
  * because of the side effect of io_writex changing memory layout.
  */
-static void save_iotlb_data(CPUState *cs, hwaddr addr,
-                            MemoryRegionSection *section, hwaddr mr_offset)
+static void save_iotlb_data(CPUState *cs, MemoryRegionSection *section,
+                            hwaddr mr_offset)
 {
 #ifdef CONFIG_PLUGIN
     SavedIOTLB *saved = &cs->saved_iotlb;
-    saved->addr = addr;
     saved->section = section;
     saved->mr_offset = mr_offset;
 #endif
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
      * The memory_region_dispatch may trigger a flush/resize
      * so for plugins we save the iotlb_data just in case.
      */
-    save_iotlb_data(cpu, full->xlat_section, section, mr_offset);
+    save_iotlb_data(cpu, section, mr_offset);
 
     if (!qemu_mutex_iothread_locked()) {
         qemu_mutex_lock_iothread();
-- 
2.34.1

When PAGE_WRITE_INV is set when calling tlb_set_page,
we immediately set TLB_INVALID_MASK in order to force
tlb_fill to be called on the next lookup.  Here in
probe_access_internal, we have just called tlb_fill
and eliminated true misses, thus the lookup must be valid.

This allows us to remove a warning comment from s390x.
There doesn't seem to be a reason to change the code though.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c            | 10 +++++++++-
 target/s390x/tcg/mem_helper.c |  4 ----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
     }
     tlb_addr = tlb_read_ofs(entry, elt_ofs);
 
+    flags = TLB_FLAGS_MASK;
     page_addr = addr & TARGET_PAGE_MASK;
     if (!tlb_hit_page(tlb_addr, page_addr)) {
         if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
 
             /* TLB resize via tlb_fill may have moved the entry.  */
             entry = tlb_entry(env, mmu_idx, addr);
+
+            /*
+             * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
+             * to force the next access through tlb_fill.  We've just
+             * called tlb_fill, so we know that this entry *is* valid.
+             */
+            flags &= ~TLB_INVALID_MASK;
         }
         tlb_addr = tlb_read_ofs(entry, elt_ofs);
     }
-    flags = tlb_addr & TLB_FLAGS_MASK;
+    flags &= tlb_addr;
 
     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -XXX,XX +XXX,XX @@ static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
 #else
     int flags;
 
-    /*
-     * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
-     * to detect if there was an exception during tlb_fill().
-     */
     env->tlb_fill_exc = 0;
     flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
                                ra);
-- 
2.34.1

Add an interface to return the CPUTLBEntryFull struct
that goes with the lookup.  The result is not intended
to be valid across multiple lookups, so the user must
use the results immediately.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/exec-all.h | 15 +++++++++++++
 include/qemu/typedefs.h |  1 +
 accel/tcg/cputlb.c      | 47 +++++++++++++++++++++++++----------------
 3 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -XXX,XX +XXX,XX @@ int probe_access_flags(CPUArchState *env, target_ulong addr,
                        MMUAccessType access_type, int mmu_idx,
                        bool nonfault, void **phost, uintptr_t retaddr);
 
+#ifndef CONFIG_USER_ONLY
+/**
+ * probe_access_full:
+ * Like probe_access_flags, except also return into @pfull.
+ *
+ * The CPUTLBEntryFull structure returned via @pfull is transient
+ * and must be consumed or copied immediately, before any further
+ * access or changes to TLB @mmu_idx.
+ */
+int probe_access_full(CPUArchState *env, target_ulong addr,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool nonfault, void **phost,
+                      CPUTLBEntryFull **pfull, uintptr_t retaddr);
+#endif
+
 #define CODE_GEN_ALIGN           16 /* must be >= of the size of a icache line */
 
 /* Estimated block size for TB allocation.  */
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index XXXXXXX..XXXXXXX 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -XXX,XX +XXX,XX @@ typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
 typedef struct CPUAddressSpace CPUAddressSpace;
 typedef struct CPUArchState CPUArchState;
 typedef struct CPUState CPUState;
+typedef struct CPUTLBEntryFull CPUTLBEntryFull;
 typedef struct DeviceListener DeviceListener;
 typedef struct DeviceState DeviceState;
 typedef struct DirtyBitmapSnapshot DirtyBitmapSnapshot;
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
 static int probe_access_internal(CPUArchState *env, target_ulong addr,
                                  int fault_size, MMUAccessType access_type,
                                  int mmu_idx, bool nonfault,
-                                 void **phost, uintptr_t retaddr)
+                                 void **phost, CPUTLBEntryFull **pfull,
+                                 uintptr_t retaddr)
 {
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
                                            mmu_idx, nonfault, retaddr)) {
                 /* Non-faulting page table read failed.  */
                 *phost = NULL;
+                *pfull = NULL;
                 return TLB_INVALID_MASK;
             }
 
             /* TLB resize via tlb_fill may have moved the entry.  */
+            index = tlb_index(env, mmu_idx, addr);
             entry = tlb_entry(env, mmu_idx, addr);
 
             /*
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
     }
     flags &= tlb_addr;
 
+    *pfull = &env_tlb(env)->d[mmu_idx].fulltlb[index];
+
     /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
     if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
         *phost = NULL;
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
     return flags;
 }
 
-int probe_access_flags(CPUArchState *env, target_ulong addr,
-                       MMUAccessType access_type, int mmu_idx,
-                       bool nonfault, void **phost, uintptr_t retaddr)
+int probe_access_full(CPUArchState *env, target_ulong addr,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool nonfault, void **phost, CPUTLBEntryFull **pfull,
+                      uintptr_t retaddr)
 {
-    int flags;
-
-    flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
-                                  nonfault, phost, retaddr);
+    int flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
+                                      nonfault, phost, pfull, retaddr);
 
     /* Handle clean RAM pages.  */
     if (unlikely(flags & TLB_NOTDIRTY)) {
-        uintptr_t index = tlb_index(env, mmu_idx, addr);
-        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-
-        notdirty_write(env_cpu(env), addr, 1, full, retaddr);
+        notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
         flags &= ~TLB_NOTDIRTY;
     }
 
     return flags;
 }
 
+int probe_access_flags(CPUArchState *env, target_ulong addr,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool nonfault, void **phost, uintptr_t retaddr)
+{
+    CPUTLBEntryFull *full;
+
+    return probe_access_full(env, addr, access_type, mmu_idx,
+                             nonfault, phost, &full, retaddr);
+}
+
 void *probe_access(CPUArchState *env, target_ulong addr, int size,
                    MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
+    CPUTLBEntryFull *full;
     void *host;
     int flags;
 
     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
 
     flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
-                                  false, &host, retaddr);
+                                  false, &host, &full, retaddr);
 
     /* Per the interface, size == 0 merely faults the access. */
     if (size == 0) {
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
     }
 
     if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
-        uintptr_t index = tlb_index(env, mmu_idx, addr);
-        CPUTLBEntryFull *full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
-
         /* Handle watchpoints.  */
         if (flags & TLB_WATCHPOINT) {
             int wp_access = (access_type == MMU_DATA_STORE
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
                         MMUAccessType access_type, int mmu_idx)
 {
+    CPUTLBEntryFull *full;
     void *host;
     int flags;
 
     flags = probe_access_internal(env, addr, 0, access_type,
-                                  mmu_idx, true, &host, 0);
+                                  mmu_idx, true, &host, &full, 0);
 
     /* No combination of flags are expected by the caller. */
     return flags ? NULL : host;
@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
                                         void **hostp)
 {
+    CPUTLBEntryFull *full;
     void *p;
 
     (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
-                                cpu_mmu_index(env, true), false, &p, 0);
+                                cpu_mmu_index(env, true), false, &p, &full, 0);
     if (p == NULL) {
         return -1;
     }
-- 
2.34.1

Now that we have collected all of the page data into
CPUTLBEntryFull, provide an interface to record that
all in one go, instead of using 4 arguments.  This interface
allows CPUTLBEntryFull to be extended without having to
change the number of arguments.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu-defs.h | 14 +++++++++++
 include/exec/exec-all.h | 22 ++++++++++++++++++
 accel/tcg/cputlb.c      | 51 ++++++++++++++++++++++++++---------------
 3 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntryFull {
      *     + the offset within the target MemoryRegion (otherwise)
      */
     hwaddr xlat_section;
+
+    /*
+     * @phys_addr contains the physical address in the address space
+     * given by cpu_asidx_from_attrs(cpu, @attrs).
+     */
+    hwaddr phys_addr;
+
+    /* @attrs contains the memory transaction attributes for the page. */
     MemTxAttrs attrs;
+
+    /* @prot contains the complete protections for the page. */
+    uint8_t prot;
+
+    /* @lg_page_size contains the log2 of the page size. */
+    uint8_t lg_page_size;
 } CPUTLBEntryFull;
 
 /*
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -XXX,XX +XXX,XX @@ void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *cpu,
                                                uint16_t idxmap,
                                                unsigned bits);
 
+/**
+ * tlb_set_page_full:
+ * @cpu: CPU context
+ * @mmu_idx: mmu index of the tlb to modify
+ * @vaddr: virtual address of the entry to add
+ * @full: the details of the tlb entry
+ *
+ * Add an entry to @cpu tlb index @mmu_idx.  All of the fields of
+ * @full must be filled, except for xlat_section, and constitute
+ * the complete description of the translated page.
+ *
+ * This is generally called by the target tlb_fill function after
+ * having performed a successful page table walk to find the physical
+ * address and attributes for the translation.
+ *
+ * At most one entry for a given virtual address is permitted. Only a
+ * single TARGET_PAGE_SIZE region is mapped; @full->lg_page_size is only
+ * used by tlb_flush_page.
+ */
+void tlb_set_page_full(CPUState *cpu, int mmu_idx, target_ulong vaddr,
+                       CPUTLBEntryFull *full);
+
 /**
  * tlb_set_page_with_attrs:
  * @cpu: CPU to add this TLB entry for
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
     env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
 }
 
-/* Add a new TLB entry. At most one entry for a given virtual address
+/*
+ * Add a new TLB entry. At most one entry for a given virtual address
  * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
  * supplied size is only used by tlb_flush_page.
  *
  * Called from TCG-generated code, which is under an RCU read-side
  * critical section.
  */
-void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
-                             hwaddr paddr, MemTxAttrs attrs, int prot,
-                             int mmu_idx, target_ulong size)
+void tlb_set_page_full(CPUState *cpu, int mmu_idx,
+                       target_ulong vaddr, CPUTLBEntryFull *full)
 {
     CPUArchState *env = cpu->env_ptr;
     CPUTLB *tlb = env_tlb(env);
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     CPUTLBEntry *te, tn;
     hwaddr iotlb, xlat, sz, paddr_page;
     target_ulong vaddr_page;
-    int asidx = cpu_asidx_from_attrs(cpu, attrs);
-    int wp_flags;
+    int asidx, wp_flags, prot;
     bool is_ram, is_romd;
 
     assert_cpu_is_self(cpu);
 
-    if (size <= TARGET_PAGE_SIZE) {
+    if (full->lg_page_size <= TARGET_PAGE_BITS) {
         sz = TARGET_PAGE_SIZE;
     } else {
-        tlb_add_large_page(env, mmu_idx, vaddr, size);
-        sz = size;
+        sz = (hwaddr)1 << full->lg_page_size;
+        tlb_add_large_page(env, mmu_idx, vaddr, sz);
     }
     vaddr_page = vaddr & TARGET_PAGE_MASK;
-    paddr_page = paddr & TARGET_PAGE_MASK;
+    paddr_page = full->phys_addr & TARGET_PAGE_MASK;
 
+    prot = full->prot;
+    asidx = cpu_asidx_from_attrs(cpu, full->attrs);
     section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
-                                                &xlat, &sz, attrs, &prot);
+                                                &xlat, &sz, full->attrs, &prot);
     assert(sz >= TARGET_PAGE_SIZE);
 
     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
               " prot=%x idx=%d\n",
-              vaddr, paddr, prot, mmu_idx);
+              vaddr, full->phys_addr, prot, mmu_idx);
 
     address = vaddr_page;
-    if (size < TARGET_PAGE_SIZE) {
+    if (full->lg_page_size < TARGET_PAGE_BITS) {
         /* Repeat the MMU check and TLB fill on every access.  */
         address |= TLB_INVALID_MASK;
     }
-    if (attrs.byte_swap) {
+    if (full->attrs.byte_swap) {
         address |= TLB_BSWAP;
     }
 
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
      * subtract here is that of the page base, and not the same as the
      * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
      */
+    desc->fulltlb[index] = *full;
     desc->fulltlb[index].xlat_section = iotlb - vaddr_page;
-    desc->fulltlb[index].attrs = attrs;
+    desc->fulltlb[index].phys_addr = paddr_page;
+    desc->fulltlb[index].prot = prot;
 
     /* Now calculate the new entry */
     tn.addend = addend - vaddr_page;
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     qemu_spin_unlock(&tlb->c.lock);
 }
 
-/* Add a new TLB entry, but without specifying the memory
- * transaction attributes to be used.
- */
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
+                             hwaddr paddr, MemTxAttrs attrs, int prot,
+                             int mmu_idx, target_ulong size)
+{
+    CPUTLBEntryFull full = {
+        .phys_addr = paddr,
+        .attrs = attrs,
+        .prot = prot,
+        .lg_page_size = ctz64(size)
+    };
+
+    assert(is_power_of_2(size));
+    tlb_set_page_full(cpu, mmu_idx, vaddr, &full);
+}
+
 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                   hwaddr paddr, int prot,
                   int mmu_idx, target_ulong size)
-- 
2.34.1

This bitmap is created and discarded immediately.
We gain nothing by its existence.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220822232338.1727934-2-richard.henderson@linaro.org>
---
 accel/tcg/translate-all.c | 78 ++-------------------------------------
 1 file changed, 4 insertions(+), 74 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@
 #define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
 #endif
 
-#define SMC_BITMAP_USE_THRESHOLD 10
-
 typedef struct PageDesc {
     /* list of TBs intersecting this ram page */
     uintptr_t first_tb;
-#ifdef CONFIG_SOFTMMU
-    /* in order to optimize self modifying code, we count the number
-       of lookups we do to a given page to use a bitmap */
-    unsigned long *code_bitmap;
-    unsigned int code_write_count;
-#else
+#ifdef CONFIG_USER_ONLY
     unsigned long flags;
     void *target_data;
 #endif
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_SOFTMMU
     QemuSpin lock;
 #endif
 } PageDesc;
@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void)
     qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
 }
 
-/* call with @p->lock held */
-static inline void invalidate_page_bitmap(PageDesc *p)
-{
-    assert_page_locked(p);
-#ifdef CONFIG_SOFTMMU
-    g_free(p->code_bitmap);
-    p->code_bitmap = NULL;
-    p->code_write_count = 0;
-#endif
-}
-
 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
 static void page_flush_tb_1(int level, void **lp)
 {
@@ -XXX,XX +XXX,XX @@ static void page_flush_tb_1(int level, void **lp)
         for (i = 0; i < V_L2_SIZE; ++i) {
             page_lock(&pd[i]);
             pd[i].first_tb = (uintptr_t)NULL;
-            invalidate_page_bitmap(pd + i);
             page_unlock(&pd[i]);
         }
     } else {
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
     if (rm_from_page_list) {
         p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
         tb_page_remove(p, tb);
-        invalidate_page_bitmap(p);
         if (tb->page_addr[1] != -1) {
             p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
             tb_page_remove(p, tb);
-            invalidate_page_bitmap(p);
         }
     }
 
@@ -XXX,XX +XXX,XX @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     }
 }
 
-#ifdef CONFIG_SOFTMMU
-/* call with @p->lock held */
-static void build_page_bitmap(PageDesc *p)
-{
-    int n, tb_start, tb_end;
-    TranslationBlock *tb;
-
-    assert_page_locked(p);
-    p->code_bitmap = bitmap_new(TARGET_PAGE_SIZE);
-
-    PAGE_FOR_EACH_TB(p, tb, n) {
-        /* NOTE: this is subtle as a TB may span two physical pages */
-        if (n == 0) {
-            /* NOTE: tb_end may be after the end of the page, but
-               it is not a problem */
-            tb_start = tb->pc & ~TARGET_PAGE_MASK;
-            tb_end = tb_start + tb->size;
-            if (tb_end > TARGET_PAGE_SIZE) {
-                tb_end = TARGET_PAGE_SIZE;
-             }
-        } else {
-            tb_start = 0;
-            tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
-        }
-        bitmap_set(p->code_bitmap, tb_start, tb_end - tb_start);
-    }
-}
-#endif
-
 /* add the tb in the target page and protect it if necessary
  *
  * Called with mmap_lock held for user-mode emulation.
@@ -XXX,XX +XXX,XX @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
     page_already_protected = p->first_tb != (uintptr_t)NULL;
 #endif
     p->first_tb = (uintptr_t)tb | n;
-    invalidate_page_bitmap(p);
 
 #if defined(CONFIG_USER_ONLY)
     /* translator_loop() must have made all TB pages non-writable */
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
     /* remove TB from the page(s) if we couldn't insert it */
     if (unlikely(existing_tb)) {
         tb_page_remove(p, tb);
-        invalidate_page_bitmap(p);
         if (p2) {
             tb_page_remove(p2, tb);
-            invalidate_page_bitmap(p2);
         }
         tb = existing_tb;
     }
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
 #if !defined(CONFIG_USER_ONLY)
     /* if no code remaining, no need to continue to use slow writes */
     if (!p->first_tb) {
-        invalidate_page_bitmap(p);
         tlb_unprotect_code(start);
     }
 #endif
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page_fast(struct page_collection *pages,
     }
 
     assert_page_locked(p);
-    if (!p->code_bitmap &&
-        ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) {
-        build_page_bitmap(p);
-    }
-    if (p->code_bitmap) {
-        unsigned int nr;
-        unsigned long b;
-
-        nr = start & ~TARGET_PAGE_MASK;
-        b = p->code_bitmap[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1));
-        if (b & ((1 << len) - 1)) {
-            goto do_invalidate;
-        }
-    } else {
-    do_invalidate:
-        tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
-                                              retaddr);
-    }
+    tb_invalidate_phys_page_range__locked(pages, p, start, start + len,
+                                          retaddr);
 }
 #else
 /* Called with mmap_lock held. If pc is not 0 then it indicates the
-- 
2.34.1

Bool is more appropriate type for the alloc parameter.

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ void page_init(void)
 #endif
 }
 
-static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
+static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
 {
     PageDesc *pd;
     void **lp;
@@ -XXX,XX +XXX,XX @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
 
 static inline PageDesc *page_find(tb_page_addr_t index)
 {
-    return page_find_alloc(index, 0);
+    return page_find_alloc(index, false);
 }
 
 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
-                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc);
+                           PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc);
 
 /* In user-mode page locks aren't used; mmap_lock is enough */
 #ifdef CONFIG_USER_ONLY
@@ -XXX,XX +XXX,XX @@ static inline void page_unlock(PageDesc *pd)
 /* lock the page(s) of a TB in the correct acquisition order */
 static inline void page_lock_tb(const TranslationBlock *tb)
 {
-    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], 0);
+    page_lock_pair(NULL, tb->page_addr[0], NULL, tb->page_addr[1], false);
 }
 
 static inline void page_unlock_tb(const TranslationBlock *tb)
@@ -XXX,XX +XXX,XX @@ void page_collection_unlock(struct page_collection *set)
 #endif /* !CONFIG_USER_ONLY */
 
 static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
-                           PageDesc **ret_p2, tb_page_addr_t phys2, int alloc)
+                           PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
 {
     PageDesc *p1, *p2;
     tb_page_addr_t page1;
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
      * Note that inserting into the hash table first isn't an option, since
      * we can only insert TBs that are fully initialized.
      */
-    page_lock_pair(&p, phys_pc, &p2, phys_page2, 1);
+    page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
     tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
     if (p2) {
         tb_page_add(p2, tb, 1, phys_page2);
@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
     for (addr = start, len = end - start;
          len != 0;
          len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
-        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
+        PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
 
         /* If the write protection bit is set, then we invalidate
            the code inside.  */
-- 
2.34.1

Use the pc coming from db->pc_first rather than the TB.

Use the cached host_addr rather than re-computing for the
first page.  We still need a separate lookup for the second
page because it won't be computed for DisasContextBase until
the translator actually performs a read from the page.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/plugin-gen.h |  7 ++++---
 accel/tcg/plugin-gen.c    | 22 +++++++++++-----------
 accel/tcg/translator.c    |  2 +-
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/plugin-gen.h
+++ b/include/exec/plugin-gen.h
@@ -XXX,XX +XXX,XX @@ struct DisasContextBase;
 
 #ifdef CONFIG_PLUGIN
 
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress);
+bool plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db,
+                         bool supress);
 void plugin_gen_tb_end(CPUState *cpu);
 void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
 void plugin_gen_insn_end(void);
@@ -XXX,XX +XXX,XX @@ static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
 
 #else /* !CONFIG_PLUGIN */
 
-static inline
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool supress)
+static inline bool
+plugin_gen_tb_start(CPUState *cpu, const struct DisasContextBase *db, bool sup)
 {
     return false;
 }
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_inject(const struct qemu_plugin_tb *plugin_tb)
     pr_ops();
 }
 
-bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_only)
+bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
+                         bool mem_only)
 {
     bool ret = false;
 
@@ -XXX,XX +XXX,XX @@ bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool mem_onl
 
         ret = true;
 
-        ptb->vaddr = tb->pc;
+        ptb->vaddr = db->pc_first;
         ptb->vaddr2 = -1;
-        get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
+        ptb->haddr1 = db->host_addr[0];
         ptb->haddr2 = NULL;
         ptb->mem_only = mem_only;
 
@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const DisasContextBase *db)
      * Note that we skip this when haddr1 == NULL, e.g. when we're
      * fetching instructions from a region not backed by RAM.
      */
-    if (likely(ptb->haddr1 != NULL && ptb->vaddr2 == -1) &&
-        unlikely((db->pc_next & TARGET_PAGE_MASK) !=
-                 (db->pc_first & TARGET_PAGE_MASK))) {
-        get_page_addr_code_hostp(cpu->env_ptr, db->pc_next,
-                                 &ptb->haddr2);
-        ptb->vaddr2 = db->pc_next;
-    }
-    if (likely(ptb->vaddr2 == -1)) {
+    if (ptb->haddr1 == NULL) {
+        pinsn->haddr = NULL;
+    } else if (is_same_page(db, db->pc_next)) {
         pinsn->haddr = ptb->haddr1 + pinsn->vaddr - ptb->vaddr;
     } else {
+        if (ptb->vaddr2 == -1) {
+            ptb->vaddr2 = TARGET_PAGE_ALIGN(db->pc_first);
+            get_page_addr_code_hostp(cpu->env_ptr, ptb->vaddr2, &ptb->haddr2);
+        }
         pinsn->haddr = ptb->haddr2 + pinsn->vaddr - ptb->vaddr2;
     }
 }
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
     ops->tb_start(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
-    plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY);
+    plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY);
 
     while (true) {
         db->num_insns++;
-- 
2.34.1

Let tb->page_addr[0] contain the address of the first byte of the
translated block, rather than the address of the page containing the
start of the translated block.  We need to recover this value anyway
at various points, and it is easier to discard a page offset when it
is not needed, which happens naturally via the existing find_page shift.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cpu-exec.c      | 16 ++++++++--------
 accel/tcg/cputlb.c        |  3 ++-
 accel/tcg/translate-all.c |  9 +++++----
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@ struct tb_desc {
     target_ulong pc;
     target_ulong cs_base;
     CPUArchState *env;
-    tb_page_addr_t phys_page1;
+    tb_page_addr_t page_addr0;
     uint32_t flags;
     uint32_t cflags;
     uint32_t trace_vcpu_dstate;
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
     const struct tb_desc *desc = d;
 
     if (tb->pc == desc->pc &&
-        tb->page_addr[0] == desc->phys_page1 &&
+        tb->page_addr[0] == desc->page_addr0 &&
         tb->cs_base == desc->cs_base &&
         tb->flags == desc->flags &&
         tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
         if (tb->page_addr[1] == -1) {
             return true;
         } else {
-            tb_page_addr_t phys_page2;
-            target_ulong virt_page2;
+            tb_page_addr_t phys_page1;
+            target_ulong virt_page1;
 
             /*
              * We know that the first page matched, and an otherwise valid TB
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
              * is different for the new TB.  Therefore any exception raised
              * here by the faulting lookup is not premature.
              */
-            virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
-            phys_page2 = get_page_addr_code(desc->env, virt_page2);
-            if (tb->page_addr[1] == phys_page2) {
+            virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
+            phys_page1 = get_page_addr_code(desc->env, virt_page1);
+            if (tb->page_addr[1] == phys_page1) {
                 return true;
             }
         }
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
     if (phys_pc == -1) {
         return NULL;
     }
-    desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
+    desc.page_addr0 = phys_pc;
     h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 }
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
    can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
 {
-    cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
+    cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
+                                             TARGET_PAGE_SIZE,
                                              DIRTY_MEMORY_CODE);
 }
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
     qemu_spin_unlock(&tb->jmp_lock);
 
     /* remove the TB from the hash list */
-    phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+    phys_pc = tb->page_addr[0];
     h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
                      tb->trace_vcpu_dstate);
     if (!qht_remove(&tb_ctx.htable, tb, h)) {
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
      * we can only insert TBs that are fully initialized.
      */
     page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
-    tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK);
+    tb_page_add(p, tb, 0, phys_pc);
     if (p2) {
         tb_page_add(p2, tb, 1, phys_page2);
     } else {
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
         if (n == 0) {
             /* NOTE: tb_end may be after the end of the page, but
                it is not a problem */
-            tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
+            tb_start = tb->page_addr[0];
             tb_end = tb_start + tb->size;
         } else {
             tb_start = tb->page_addr[1];
-            tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
+            tb_end = tb_start + ((tb->page_addr[0] + tb->size)
+                                 & ~TARGET_PAGE_MASK);
         }
         if (!(tb_end <= start || tb_start >= end)) {
 #ifdef TARGET_HAS_PRECISE_SMC
-- 
2.34.1

This function has two users, who use it incompatibly.
In tlb_flush_page_by_mmuidx_async_0, when flushing a
single page, we need to flush exactly two pages.
In tlb_flush_range_by_mmuidx_async_0, when flushing a
range of pages, we need to flush N+1 pages.

This avoids double-flushing of jmp cache pages in a range.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/cputlb.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
     }
 }
 
-static void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
-{
-    /* Discard jump cache entries for any tb which might potentially
-       overlap the flushed page.  */
-    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
-    tb_jmp_cache_clear_page(cpu, addr);
-}
-
 /**
  * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
  * @desc: The CPUTLBDesc portion of the TLB
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
     }
     qemu_spin_unlock(&env_tlb(env)->c.lock);
 
-    tb_flush_jmp_cache(cpu, addr);
+    /*
+     * Discard jump cache entries for any tb which might potentially
+     * overlap the flushed page, which includes the previous.
+     */
+    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
+    tb_jmp_cache_clear_page(cpu, addr);
 }
 
 /**
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
         return;
     }
 
-    for (target_ulong i = 0; i < d.len; i += TARGET_PAGE_SIZE) {
-        tb_flush_jmp_cache(cpu, d.addr + i);
+    /*
+     * Discard jump cache entries for any tb which might potentially
+     * overlap the flushed pages, which includes the previous.
+     */
+    d.addr -= TARGET_PAGE_SIZE;
+    for (target_ulong i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
+        tb_jmp_cache_clear_page(cpu, d.addr);
+        d.addr += TARGET_PAGE_SIZE;
     }
 }
 
-- 
2.34.1

Wrap the bare TranslationBlock pointer into a structure.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/tb-hash.h       |  1 +
 accel/tcg/tb-jmp-cache.h  | 24 ++++++++++++++++++++++++
 include/exec/cpu-common.h |  1 +
 include/hw/core/cpu.h     | 15 +--------------
 include/qemu/typedefs.h   |  1 +
 accel/stubs/tcg-stub.c    |  4 ++++
 accel/tcg/cpu-exec.c      | 10 +++++++---
 accel/tcg/cputlb.c        |  9 +++++----
 accel/tcg/translate-all.c | 28 +++++++++++++++++++++++++---
 hw/core/cpu-common.c      |  3 +--
 plugins/core.c            |  2 +-
 trace/control-target.c    |  2 +-
 12 files changed, 72 insertions(+), 28 deletions(-)
 create mode 100644 accel/tcg/tb-jmp-cache.h

diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -XXX,XX +XXX,XX @@
 #include "exec/cpu-defs.h"
 #include "exec/exec-all.h"
 #include "qemu/xxhash.h"
+#include "tb-jmp-cache.h"
 
 #ifdef CONFIG_SOFTMMU
 
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
new file mode 100644
index XXXXXXX..XXXXXXX
--- /dev/null
+++ b/accel/tcg/tb-jmp-cache.h
@@ -XXX,XX +XXX,XX @@
+/*
+ * The per-CPU TranslationBlock jump cache.
+ *
+ *  Copyright (c) 2003 Fabrice Bellard
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef ACCEL_TCG_TB_JMP_CACHE_H
+#define ACCEL_TCG_TB_JMP_CACHE_H
+
+#define TB_JMP_CACHE_BITS 12
+#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
+
+/*
+ * Accessed in parallel; all accesses to 'tb' must be atomic.
+ */
+struct CPUJumpCache {
+    struct {
+        TranslationBlock *tb;
+    } array[TB_JMP_CACHE_SIZE];
+};
+
+#endif /* ACCEL_TCG_TB_JMP_CACHE_H */
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -XXX,XX +XXX,XX @@ void cpu_list_unlock(void);
 unsigned int cpu_list_generation_id_get(void);
 
 void tcg_flush_softmmu_tlb(CPUState *cs);
+void tcg_flush_jmp_cache(CPUState *cs);
 
 void tcg_iommu_init_notifier_list(CPUState *cpu);
 void tcg_iommu_free_notifier_list(CPUState *cpu);
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index XXXXXXX..XXXXXXX 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -XXX,XX +XXX,XX @@ struct kvm_run;
 struct hax_vcpu_state;
 struct hvf_vcpu_state;
 
-#define TB_JMP_CACHE_BITS 12
-#define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
-
 /* work queue */
 
 /* The union type allows passing of 64 bit target pointers on 32 bit
@@ -XXX,XX +XXX,XX @@ struct CPUState {
     CPUArchState *env_ptr;
     IcountDecr *icount_decr_ptr;
 
-    /* Accessed in parallel; all accesses must be atomic */
-    TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
+    CPUJumpCache *tb_jmp_cache;
 
     struct GDBRegisterState *gdb_regs;
     int gdb_num_regs;
@@ -XXX,XX +XXX,XX @@ extern CPUTailQ cpus;
 
 extern __thread CPUState *current_cpu;
 
-static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
-{
-    unsigned int i;
-
-    for (i = 0; i < TB_JMP_CACHE_SIZE; i++) {
-        qatomic_set(&cpu->tb_jmp_cache[i], NULL);
-    }
-}
-
 /**
  * qemu_tcg_mttcg_enabled:
  * Check whether we are running MultiThread TCG or not.
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index XXXXXXX..XXXXXXX 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -XXX,XX +XXX,XX @@ typedef struct CoMutex CoMutex;
 typedef struct ConfidentialGuestSupport ConfidentialGuestSupport;
 typedef struct CPUAddressSpace CPUAddressSpace;
 typedef struct CPUArchState CPUArchState;
+typedef struct CPUJumpCache CPUJumpCache;
 typedef struct CPUState CPUState;
 typedef struct CPUTLBEntryFull CPUTLBEntryFull;
 typedef struct DeviceListener DeviceListener;
diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -XXX,XX +XXX,XX @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 {
 }
 
+void tcg_flush_jmp_cache(CPUState *cpu)
+{
+}
+
 int probe_access_flags(CPUArchState *env, target_ulong addr,
                        MMUAccessType access_type, int mmu_idx,
                        bool nonfault, void **phost, uintptr_t retaddr)
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@
 #include "sysemu/replay.h"
 #include "sysemu/tcg.h"
 #include "exec/helper-proto.h"
+#include "tb-jmp-cache.h"
 #include "tb-hash.h"
 #include "tb-context.h"
 #include "internal.h"
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
     tcg_debug_assert(!(cflags & CF_INVALID));
 
     hash = tb_jmp_cache_hash_func(pc);
-    tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]);
+    tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
 
     if (likely(tb &&
                tb->pc == pc &&
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
     if (tb == NULL) {
         return NULL;
     }
-    qatomic_set(&cpu->tb_jmp_cache[hash], tb);
+    qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
     return tb;
 }
 
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
 
             tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
             if (tb == NULL) {
+                uint32_t h;
+
                 mmap_lock();
                 tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
                 mmap_unlock();
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
                  * We add the TB in the virtual pc hash table
                  * for the fast lookup
                  */
-                qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
+                h = tb_jmp_cache_hash_func(pc);
+                qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
             }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
 
 static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr)
 {
-    unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr);
+    int i, i0 = tb_jmp_cache_hash_page(page_addr);
+    CPUJumpCache *jc = cpu->tb_jmp_cache;
 
     for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
-        qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL);
+        qatomic_set(&jc->array[i0 + i].tb, NULL);
     }
 }
 
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 
     qemu_spin_unlock(&env_tlb(env)->c.lock);
 
-    cpu_tb_jmp_cache_clear(cpu);
+    tcg_flush_jmp_cache(cpu);
 
     if (to_clean == ALL_MMUIDX_BITS) {
         qatomic_set(&env_tlb(env)->c.full_flush_count,
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
      * longer to clear each entry individually than it will to clear it all.
      */
     if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
-        cpu_tb_jmp_cache_clear(cpu);
+        tcg_flush_jmp_cache(cpu);
         return;
     }
 
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@
 #include "sysemu/tcg.h"
 #include "qapi/error.h"
 #include "hw/core/tcg-cpu-ops.h"
+#include "tb-jmp-cache.h"
 #include "tb-hash.h"
 #include "tb-context.h"
 #include "internal.h"
@@ -XXX,XX +XXX,XX @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
     }
 
     CPU_FOREACH(cpu) {
-        cpu_tb_jmp_cache_clear(cpu);
+        tcg_flush_jmp_cache(cpu);
     }
 
     qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
     /* remove the TB from the hash list */
     h = tb_jmp_cache_hash_func(tb->pc);
     CPU_FOREACH(cpu) {
-        if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) {
-            qatomic_set(&cpu->tb_jmp_cache[h], NULL);
+        CPUJumpCache *jc = cpu->tb_jmp_cache;
+        if (qatomic_read(&jc->array[h].tb) == tb) {
+            qatomic_set(&jc->array[h].tb, NULL);
         }
     }
 
@@ -XXX,XX +XXX,XX @@ int page_unprotect(target_ulong address, uintptr_t pc)
 }
 #endif /* CONFIG_USER_ONLY */
 
+/*
+ * Called by generic code at e.g. cpu reset after cpu creation,
+ * therefore we must be prepared to allocate the jump cache.
+ */
+void tcg_flush_jmp_cache(CPUState *cpu)
+{
+    CPUJumpCache *jc = cpu->tb_jmp_cache;
+
+    if (likely(jc)) {
+        for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
+            qatomic_set(&jc->array[i].tb, NULL);
+        }
+    } else {
+        /* This should happen once during realize, and thus never race. */
+        jc = g_new0(CPUJumpCache, 1);
+        jc = qatomic_xchg(&cpu->tb_jmp_cache, jc);
+        assert(jc == NULL);
+    }
+}
+
 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
 void tcg_flush_softmmu_tlb(CPUState *cs)
 {
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -XXX,XX +XXX,XX @@ static void cpu_common_reset(DeviceState *dev)
     cpu->cflags_next_tb = -1;
 
     if (tcg_enabled()) {
-        cpu_tb_jmp_cache_clear(cpu);
-
+        tcg_flush_jmp_cache(cpu);
         tcg_flush_softmmu_tlb(cpu);
     }
 }
diff --git a/plugins/core.c b/plugins/core.c
index XXXXXXX..XXXXXXX 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -XXX,XX +XXX,XX @@ struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id)
 static void plugin_cpu_update__async(CPUState *cpu, run_on_cpu_data data)
 {
     bitmap_copy(cpu->plugin_mask, &data.host_ulong, QEMU_PLUGIN_EV_MAX);
-    cpu_tb_jmp_cache_clear(cpu);
+    tcg_flush_jmp_cache(cpu);
 }
 
 static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
diff --git a/trace/control-target.c b/trace/control-target.c
index XXXXXXX..XXXXXXX 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -XXX,XX +XXX,XX @@ static void trace_event_synchronize_vcpu_state_dynamic(
 {
     bitmap_copy(vcpu->trace_dstate, vcpu->trace_dstate_delayed,
                 CPU_TRACE_DSTATE_MAX_EVENTS);
-    cpu_tb_jmp_cache_clear(vcpu);
+    tcg_flush_jmp_cache(vcpu);
 }
 
 void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
-- 
2.34.1

Populate this new method for all targets.  Always match
the result that would be given by cpu_get_tb_cpu_state,
as we will want these values to correspond in the logs.

Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (target/sparc)
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
Cc: Eduardo Habkost <eduardo@habkost.net> (supporter:Machine core)
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> (supporter:Machine core)
Cc: "Philippe Mathieu-Daudé" <f4bug@amsat.org> (reviewer:Machine core)
Cc: Yanan Wang <wangyanan55@huawei.com> (reviewer:Machine core)
Cc: Michael Rolnik <mrolnik@gmail.com> (maintainer:AVR TCG CPUs)
Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com> (maintainer:CRIS TCG CPUs)
Cc: Taylor Simpson <tsimpson@quicinc.com> (supporter:Hexagon TCG CPUs)
Cc: Song Gao <gaosong@loongson.cn> (maintainer:LoongArch TCG CPUs)
Cc: Xiaojuan Yang <yangxiaojuan@loongson.cn> (maintainer:LoongArch TCG CPUs)
Cc: Laurent Vivier <laurent@vivier.eu> (maintainer:M68K TCG CPUs)
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com> (reviewer:MIPS TCG CPUs)
Cc: Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> (reviewer:MIPS TCG CPUs)
Cc: Chris Wulff <crwulff@gmail.com> (maintainer:NiosII TCG CPUs)
Cc: Marek Vasut <marex@denx.de> (maintainer:NiosII TCG CPUs)
Cc: Stafford Horne <shorne@gmail.com> (odd fixer:OpenRISC TCG CPUs)
Cc: Yoshinori Sato <ysato@users.sourceforge.jp> (reviewer:RENESAS RX CPUs)
Cc: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> (maintainer:SPARC TCG CPUs)
Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de> (maintainer:TriCore TCG CPUs)
Cc: Max Filippov <jcmvbkbc@gmail.com> (maintainer:Xtensa TCG CPUs)
Cc: qemu-arm@nongnu.org (open list:ARM TCG CPUs)
Cc: qemu-ppc@nongnu.org (open list:PowerPC TCG CPUs)
Cc: qemu-riscv@nongnu.org (open list:RISC-V TCG CPUs)
Cc: qemu-s390x@nongnu.org (open list:S390 TCG CPUs)
---
 include/hw/core/cpu.h   |  3 +++
 target/alpha/cpu.c      |  9 +++++++++
 target/arm/cpu.c        | 13 +++++++++++++
 target/avr/cpu.c        |  8 ++++++++
 target/cris/cpu.c       |  8 ++++++++
 target/hexagon/cpu.c    |  8 ++++++++
 target/hppa/cpu.c       |  8 ++++++++
 target/i386/cpu.c       |  9 +++++++++
 target/loongarch/cpu.c  |  9 +++++++++
 target/m68k/cpu.c       |  8 ++++++++
 target/microblaze/cpu.c |  8 ++++++++
 target/mips/cpu.c       |  8 ++++++++
 target/nios2/cpu.c      |  9 +++++++++
 target/openrisc/cpu.c   |  8 ++++++++
 target/ppc/cpu_init.c   |  8 ++++++++
 target/riscv/cpu.c      | 13 +++++++++++++
 target/rx/cpu.c         |  8 ++++++++
 target/s390x/cpu.c      |  8 ++++++++
 target/sh4/cpu.c        |  8 ++++++++
 target/sparc/cpu.c      |  8 ++++++++
 target/tricore/cpu.c    |  9 +++++++++
 target/xtensa/cpu.c     |  8 ++++++++
 22 files changed, 186 insertions(+)

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index XXXXXXX..XXXXXXX 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -XXX,XX +XXX,XX @@ struct SysemuCPUOps;
  *       If the target behaviour here is anything other than "set
  *       the PC register to the value passed in" then the target must
  *       also implement the synchronize_from_tb hook.
+ * @get_pc: Callback for getting the Program Counter register.
+ *       As above, with the semantics of the target architecture.
  * @gdb_read_register: Callback for letting GDB read a register.
  * @gdb_write_register: Callback for letting GDB write a register.
  * @gdb_adjust_breakpoint: Callback for adjusting the address of a
@@ -XXX,XX +XXX,XX @@ struct CPUClass {
     void (*dump_state)(CPUState *cpu, FILE *, int flags);
     int64_t (*get_arch_id)(CPUState *cpu);
     void (*set_pc)(CPUState *cpu, vaddr value);
+    vaddr (*get_pc)(CPUState *cpu);
     int (*gdb_read_register)(CPUState *cpu, GByteArray *buf, int reg);
     int (*gdb_write_register)(CPUState *cpu, uint8_t *buf, int reg);
     vaddr (*gdb_adjust_breakpoint)(CPUState *cpu, vaddr addr);
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.pc = value;
 }
 
+static vaddr alpha_cpu_get_pc(CPUState *cs)
+{
+    AlphaCPU *cpu = ALPHA_CPU(cs);
+
+    return cpu->env.pc;
+}
+
+
 static bool alpha_cpu_has_work(CPUState *cs)
 {
     /* Here we are checking to see if the CPU should wake up from HALT.
@@ -XXX,XX +XXX,XX @@ static void alpha_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = alpha_cpu_has_work;
     cc->dump_state = alpha_cpu_dump_state;
     cc->set_pc = alpha_cpu_set_pc;
+    cc->get_pc = alpha_cpu_get_pc;
     cc->gdb_read_register = alpha_cpu_gdb_read_register;
     cc->gdb_write_register = alpha_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_set_pc(CPUState *cs, vaddr value)
     }
 }
 
+static vaddr arm_cpu_get_pc(CPUState *cs)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+
+    if (is_a64(env)) {
+        return env->pc;
+    } else {
+        return env->regs[15];
+    }
+}
+
 #ifdef CONFIG_TCG
 void arm_cpu_synchronize_from_tb(CPUState *cs,
                                  const TranslationBlock *tb)
@@ -XXX,XX +XXX,XX @@ static void arm_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = arm_cpu_has_work;
     cc->dump_state = arm_cpu_dump_state;
     cc->set_pc = arm_cpu_set_pc;
+    cc->get_pc = arm_cpu_get_pc;
     cc->gdb_read_register = arm_cpu_gdb_read_register;
     cc->gdb_write_register = arm_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.pc_w = value / 2; /* internally PC points to words */
 }
 
+static vaddr avr_cpu_get_pc(CPUState *cs)
+{
+    AVRCPU *cpu = AVR_CPU(cs);
+
+    return cpu->env.pc_w * 2;
+}
+
 static bool avr_cpu_has_work(CPUState *cs)
 {
     AVRCPU *cpu = AVR_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = avr_cpu_has_work;
     cc->dump_state = avr_cpu_dump_state;
     cc->set_pc = avr_cpu_set_pc;
+    cc->get_pc = avr_cpu_get_pc;
     dc->vmsd = &vms_avr_cpu;
     cc->sysemu_ops = &avr_sysemu_ops;
     cc->disas_set_info = avr_cpu_disas_set_info;
diff --git a/target/cris/cpu.c b/target/cris/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/cris/cpu.c
+++ b/target/cris/cpu.c
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.pc = value;
 }
 
+static vaddr cris_cpu_get_pc(CPUState *cs)
+{
+    CRISCPU *cpu = CRIS_CPU(cs);
+
+    return cpu->env.pc;
+}
+
 static bool cris_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI);
@@ -XXX,XX +XXX,XX @@ static void cris_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = cris_cpu_has_work;
     cc->dump_state = cris_cpu_dump_state;
     cc->set_pc = cris_cpu_set_pc;
+    cc->get_pc = cris_cpu_get_pc;
     cc->gdb_read_register = cris_cpu_gdb_read_register;
     cc->gdb_write_register = cris_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_set_pc(CPUState *cs, vaddr value)
     env->gpr[HEX_REG_PC] = value;
 }
 
+static vaddr hexagon_cpu_get_pc(CPUState *cs)
+{
+    HexagonCPU *cpu = HEXAGON_CPU(cs);
+    CPUHexagonState *env = &cpu->env;
+    return env->gpr[HEX_REG_PC];
+}
+
 static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
                                             const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_class_init(ObjectClass *c, void *data)
     cc->has_work = hexagon_cpu_has_work;
     cc->dump_state = hexagon_dump_state;
     cc->set_pc = hexagon_cpu_set_pc;
+    cc->get_pc = hexagon_cpu_get_pc;
     cc->gdb_read_register = hexagon_gdb_read_register;
     cc->gdb_write_register = hexagon_gdb_write_register;
     cc->gdb_num_core_regs = TOTAL_PER_THREAD_REGS + NUM_VREGS + NUM_QREGS;
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.iaoq_b = value + 4;
 }
 
+static vaddr hppa_cpu_get_pc(CPUState *cs)
+{
+    HPPACPU *cpu = HPPA_CPU(cs);
+
+    return cpu->env.iaoq_f;
+}
+
 static void hppa_cpu_synchronize_from_tb(CPUState *cs,
                                          const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = hppa_cpu_has_work;
     cc->dump_state = hppa_cpu_dump_state;
     cc->set_pc = hppa_cpu_set_pc;
+    cc->get_pc = hppa_cpu_get_pc;
     cc->gdb_read_register = hppa_cpu_gdb_read_register;
     cc->gdb_write_register = hppa_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.eip = value;
 }
 
+static vaddr x86_cpu_get_pc(CPUState *cs)
+{
+    X86CPU *cpu = X86_CPU(cs);
+
+    /* Match cpu_get_tb_cpu_state. */
+    return cpu->env.eip + cpu->env.segs[R_CS].base;
+}
+
 int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request)
 {
     X86CPU *cpu = X86_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_common_class_init(ObjectClass *oc, void *data)
     cc->has_work = x86_cpu_has_work;
     cc->dump_state = x86_cpu_dump_state;
     cc->set_pc = x86_cpu_set_pc;
+    cc->get_pc = x86_cpu_get_pc;
     cc->gdb_read_register = x86_cpu_gdb_read_register;
     cc->gdb_write_register = x86_cpu_gdb_write_register;
     cc->get_arch_id = x86_cpu_get_arch_id;
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_set_pc(CPUState *cs, vaddr value)
     env->pc = value;
 }
 
+static vaddr loongarch_cpu_get_pc(CPUState *cs)
+{
+    LoongArchCPU *cpu = LOONGARCH_CPU(cs);
+    CPULoongArchState *env = &cpu->env;
+
+    return env->pc;
+}
+
 #ifndef CONFIG_USER_ONLY
 #include "hw/loongarch/virt.h"
 
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_class_init(ObjectClass *c, void *data)
     cc->has_work = loongarch_cpu_has_work;
     cc->dump_state = loongarch_cpu_dump_state;
     cc->set_pc = loongarch_cpu_set_pc;
+    cc->get_pc = loongarch_cpu_get_pc;
 #ifndef CONFIG_USER_ONLY
     dc->vmsd = &vmstate_loongarch_cpu;
     cc->sysemu_ops = &loongarch_sysemu_ops;
diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/m68k/cpu.c
+++ b/target/m68k/cpu.c
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.pc = value;
 }
 
+static vaddr m68k_cpu_get_pc(CPUState *cs)
+{
+    M68kCPU *cpu = M68K_CPU(cs);
+
+    return cpu->env.pc;
+}
+
 static bool m68k_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
@@ -XXX,XX +XXX,XX @@ static void m68k_cpu_class_init(ObjectClass *c, void *data)
     cc->has_work = m68k_cpu_has_work;
     cc->dump_state = m68k_cpu_dump_state;
     cc->set_pc = m68k_cpu_set_pc;
+    cc->get_pc = m68k_cpu_get_pc;
     cc->gdb_read_register = m68k_cpu_gdb_read_register;
     cc->gdb_write_register = m68k_cpu_gdb_write_register;
 #if defined(CONFIG_SOFTMMU)
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.iflags = 0;
 }
 
+static vaddr mb_cpu_get_pc(CPUState *cs)
+{
+    MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
+
+    return cpu->env.pc;
+}
+
 static void mb_cpu_synchronize_from_tb(CPUState *cs,
                                        const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_class_init(ObjectClass *oc, void *data)
 
     cc->dump_state = mb_cpu_dump_state;
     cc->set_pc = mb_cpu_set_pc;
+    cc->get_pc = mb_cpu_get_pc;
     cc->gdb_read_register = mb_cpu_gdb_read_register;
     cc->gdb_write_register = mb_cpu_gdb_write_register;
 
diff --git a/target/mips/cpu.c b/target/mips/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_set_pc(CPUState *cs, vaddr value)
     mips_env_set_pc(&cpu->env, value);
 }
 
+static vaddr mips_cpu_get_pc(CPUState *cs)
+{
+    MIPSCPU *cpu = MIPS_CPU(cs);
+
+    return cpu->env.active_tc.PC;
+}
+
 static bool mips_cpu_has_work(CPUState *cs)
 {
     MIPSCPU *cpu = MIPS_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void mips_cpu_class_init(ObjectClass *c, void *data)
     cc->has_work = mips_cpu_has_work;
     cc->dump_state = mips_cpu_dump_state;
     cc->set_pc = mips_cpu_set_pc;
+    cc->get_pc = mips_cpu_get_pc;
     cc->gdb_read_register = mips_cpu_gdb_read_register;
     cc->gdb_write_register = mips_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/nios2/cpu.c b/target/nios2/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/nios2/cpu.c
+++ b/target/nios2/cpu.c
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_set_pc(CPUState *cs, vaddr value)
     env->pc = value;
 }
 
+static vaddr nios2_cpu_get_pc(CPUState *cs)
+{
+    Nios2CPU *cpu = NIOS2_CPU(cs);
+    CPUNios2State *env = &cpu->env;
+
+    return env->pc;
+}
+
 static bool nios2_cpu_has_work(CPUState *cs)
 {
     return cs->interrupt_request & CPU_INTERRUPT_HARD;
@@ -XXX,XX +XXX,XX @@ static void nios2_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = nios2_cpu_has_work;
     cc->dump_state = nios2_cpu_dump_state;
     cc->set_pc = nios2_cpu_set_pc;
+    cc->get_pc = nios2_cpu_get_pc;
     cc->disas_set_info = nios2_cpu_disas_set_info;
 #ifndef CONFIG_USER_ONLY
     cc->sysemu_ops = &nios2_sysemu_ops;
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.dflag = 0;
 }
 
+static vaddr openrisc_cpu_get_pc(CPUState *cs)
+{
+    OpenRISCCPU *cpu = OPENRISC_CPU(cs);
+
+    return cpu->env.pc;
+}
+
 static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
                                              const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = openrisc_cpu_has_work;
     cc->dump_state = openrisc_cpu_dump_state;
     cc->set_pc = openrisc_cpu_set_pc;
+    cc->get_pc = openrisc_cpu_get_pc;
     cc->gdb_read_register = openrisc_cpu_gdb_read_register;
     cc->gdb_write_register = openrisc_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index XXXXXXX..XXXXXXX 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.nip = value;
 }
 
+static vaddr ppc_cpu_get_pc(CPUState *cs)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+    return cpu->env.nip;
+}
+
 static bool ppc_cpu_has_work(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = ppc_cpu_has_work;
     cc->dump_state = ppc_cpu_dump_state;
     cc->set_pc = ppc_cpu_set_pc;
+    cc->get_pc = ppc_cpu_get_pc;
     cc->gdb_read_register = ppc_cpu_gdb_read_register;
     cc->gdb_write_register = ppc_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_set_pc(CPUState *cs, vaddr value)
     }
 }
 
+static vaddr riscv_cpu_get_pc(CPUState *cs)
+{
+    RISCVCPU *cpu = RISCV_CPU(cs);
+    CPURISCVState *env = &cpu->env;
+
+    /* Match cpu_get_tb_cpu_state. */
+    if (env->xl == MXL_RV32) {
+        return env->pc & UINT32_MAX;
+    }
+    return env->pc;
+}
+
 static void riscv_cpu_synchronize_from_tb(CPUState *cs,
                                           const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_class_init(ObjectClass *c, void *data)
     cc->has_work = riscv_cpu_has_work;
     cc->dump_state = riscv_cpu_dump_state;
     cc->set_pc = riscv_cpu_set_pc;
+    cc->get_pc = riscv_cpu_get_pc;
     cc->gdb_read_register = riscv_cpu_gdb_read_register;
     cc->gdb_write_register = riscv_cpu_gdb_write_register;
     cc->gdb_num_core_regs = 33;
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.pc = value;
 }
 
+static vaddr rx_cpu_get_pc(CPUState *cs)
+{
+    RXCPU *cpu = RX_CPU(cs);
+
+    return cpu->env.pc;
+}
+
 static void rx_cpu_synchronize_from_tb(CPUState *cs,
                                        const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_class_init(ObjectClass *klass, void *data)
     cc->has_work = rx_cpu_has_work;
     cc->dump_state = rx_cpu_dump_state;
     cc->set_pc = rx_cpu_set_pc;
+    cc->get_pc = rx_cpu_get_pc;
 
 #ifndef CONFIG_USER_ONLY
     cc->sysemu_ops = &rx_sysemu_ops;
diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/s390x/cpu.c
+++ b/target/s390x/cpu.c
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.psw.addr = value;
 }
 
+static vaddr s390_cpu_get_pc(CPUState *cs)
+{
+    S390CPU *cpu = S390_CPU(cs);
+
+    return cpu->env.psw.addr;
+}
+
 static bool s390_cpu_has_work(CPUState *cs)
 {
     S390CPU *cpu = S390_CPU(cs);
@@ -XXX,XX +XXX,XX @@ static void s390_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = s390_cpu_has_work;
     cc->dump_state = s390_cpu_dump_state;
     cc->set_pc = s390_cpu_set_pc;
+    cc->get_pc = s390_cpu_get_pc;
     cc->gdb_read_register = s390_cpu_gdb_read_register;
     cc->gdb_write_register = s390_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.pc = value;
 }
 
+static vaddr superh_cpu_get_pc(CPUState *cs)
+{
+    SuperHCPU *cpu = SUPERH_CPU(cs);
+
+    return cpu->env.pc;
+}
+
 static void superh_cpu_synchronize_from_tb(CPUState *cs,
                                            const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = superh_cpu_has_work;
     cc->dump_state = superh_cpu_dump_state;
     cc->set_pc = superh_cpu_set_pc;
+    cc->get_pc = superh_cpu_get_pc;
     cc->gdb_read_register = superh_cpu_gdb_read_register;
     cc->gdb_write_register = superh_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.npc = value + 4;
 }
 
+static vaddr sparc_cpu_get_pc(CPUState *cs)
+{
+    SPARCCPU *cpu = SPARC_CPU(cs);
+
+    return cpu->env.pc;
+}
+
 static void sparc_cpu_synchronize_from_tb(CPUState *cs,
                                           const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_class_init(ObjectClass *oc, void *data)
     cc->memory_rw_debug = sparc_cpu_memory_rw_debug;
 #endif
     cc->set_pc = sparc_cpu_set_pc;
+    cc->get_pc = sparc_cpu_get_pc;
     cc->gdb_read_register = sparc_cpu_gdb_read_register;
     cc->gdb_write_register = sparc_cpu_gdb_write_register;
 #ifndef CONFIG_USER_ONLY
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_set_pc(CPUState *cs, vaddr value)
     env->PC = value & ~(target_ulong)1;
 }
 
+static vaddr tricore_cpu_get_pc(CPUState *cs)
+{
+    TriCoreCPU *cpu = TRICORE_CPU(cs);
+    CPUTriCoreState *env = &cpu->env;
+
+    return env->PC;
+}
+
 static void tricore_cpu_synchronize_from_tb(CPUState *cs,
                                             const TranslationBlock *tb)
 {
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_class_init(ObjectClass *c, void *data)
 
     cc->dump_state = tricore_cpu_dump_state;
     cc->set_pc = tricore_cpu_set_pc;
+    cc->get_pc = tricore_cpu_get_pc;
     cc->sysemu_ops = &tricore_sysemu_ops;
     cc->tcg_ops = &tricore_tcg_ops;
 }
diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/xtensa/cpu.c
+++ b/target/xtensa/cpu.c
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_set_pc(CPUState *cs, vaddr value)
     cpu->env.pc = value;
 }
 
+static vaddr xtensa_cpu_get_pc(CPUState *cs)
+{
+    XtensaCPU *cpu = XTENSA_CPU(cs);
+
+    return cpu->env.pc;
+}
+
 static bool xtensa_cpu_has_work(CPUState *cs)
 {
 #ifndef CONFIG_USER_ONLY
@@ -XXX,XX +XXX,XX @@ static void xtensa_cpu_class_init(ObjectClass *oc, void *data)
     cc->has_work = xtensa_cpu_has_work;
     cc->dump_state = xtensa_cpu_dump_state;
     cc->set_pc = xtensa_cpu_set_pc;
+    cc->get_pc = xtensa_cpu_get_pc;
     cc->gdb_read_register = xtensa_cpu_gdb_read_register;
     cc->gdb_write_register = xtensa_cpu_gdb_write_register;
     cc->gdb_stop_before_watchpoint = true;
-- 
2.34.1

The availability of tb->pc will shortly be conditional.
Introduce accessor functions to minimize ifdefs.

Pass around a known pc to places like tcg_gen_code,
where the caller must already have the value.

diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
 void page_init(void);
 void tb_htable_init(void);
 
+/* Return the current PC from CPU, which may be cached in TB. */
+static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
+{
+    return tb_pc(tb);
+}
+
 #endif /* ACCEL_TCG_INTERNAL_H */
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
     uintptr_t jmp_dest[2];
 };
 
+/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
+static inline target_ulong tb_pc(const TranslationBlock *tb)
+{
+    return tb->pc;
+}
+
 /* Hide the qatomic_read to make code a little easier on the eyes */
 static inline uint32_t tb_cflags(const TranslationBlock *tb)
 {
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index XXXXXXX..XXXXXXX 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
 void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb);
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
 
 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
 
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
     const TranslationBlock *tb = p;
     const struct tb_desc *desc = d;
 
-    if (tb->pc == desc->pc &&
+    if (tb_pc(tb) == desc->pc &&
         tb->page_addr[0] == desc->page_addr0 &&
         tb->cs_base == desc->cs_base &&
         tb->flags == desc->flags &&
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
     return tb;
 }
 
-static inline void log_cpu_exec(target_ulong pc, CPUState *cpu,
-                                const TranslationBlock *tb)
+static void log_cpu_exec(target_ulong pc, CPUState *cpu,
+                         const TranslationBlock *tb)
 {
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC))
-        && qemu_log_in_addr_range(pc)) {
-
+    if (qemu_log_in_addr_range(pc)) {
         qemu_log_mask(CPU_LOG_EXEC,
                       "Trace %d: %p [" TARGET_FMT_lx
                       "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
@@ -XXX,XX +XXX,XX @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
         return tcg_code_gen_epilogue;
     }
 
-    log_cpu_exec(pc, cpu, tb);
+    if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
+        log_cpu_exec(pc, cpu, tb);
+    }
 
     return tb->tc.ptr;
 }
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
     TranslationBlock *last_tb;
     const void *tb_ptr = itb->tc.ptr;
 
-    log_cpu_exec(itb->pc, cpu, itb);
+    if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
+        log_cpu_exec(log_pc(cpu, itb), cpu, itb);
+    }
 
     qemu_thread_jit_execute();
     ret = tcg_qemu_tb_exec(env, tb_ptr);
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
          * of the start of the TB.
          */
         CPUClass *cc = CPU_GET_CLASS(cpu);
-        qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
-                               "Stopped execution of TB chain before %p ["
-                               TARGET_FMT_lx "] %s\n",
-                               last_tb->tc.ptr, last_tb->pc,
-                               lookup_symbol(last_tb->pc));
+
         if (cc->tcg_ops->synchronize_from_tb) {
             cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
         } else {
             assert(cc->set_pc);
-            cc->set_pc(cpu, last_tb->pc);
+            cc->set_pc(cpu, tb_pc(last_tb));
+        }
+        if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
+            target_ulong pc = log_pc(cpu, last_tb);
+            if (qemu_log_in_addr_range(pc)) {
+                qemu_log("Stopped execution of TB chain before %p ["
+                         TARGET_FMT_lx "] %s\n",
+                         last_tb->tc.ptr, pc, lookup_symbol(pc));
+            }
         }
     }
 
@@ -XXX,XX +XXX,XX @@ static inline void tb_add_jump(TranslationBlock *tb, int n,
 
     qemu_spin_unlock(&tb_next->jmp_lock);
 
-    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
-                           "Linking TBs %p [" TARGET_FMT_lx
-                           "] index %d -> %p [" TARGET_FMT_lx "]\n",
-                           tb->tc.ptr, tb->pc, n,
-                           tb_next->tc.ptr, tb_next->pc);
+    qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
+                  tb->tc.ptr, n, tb_next->tc.ptr);
     return;
 
  out_unlock_next:
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
 }
 
 static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
+                                    target_ulong pc,
                                     TranslationBlock **last_tb, int *tb_exit)
 {
     int32_t insns_left;
 
-    trace_exec_tb(tb, tb->pc);
+    trace_exec_tb(tb, pc);
     tb = cpu_tb_exec(cpu, tb, tb_exit);
     if (*tb_exit != TB_EXIT_REQUESTED) {
         *last_tb = tb;
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
                 tb_add_jump(last_tb, tb_exit, tb);
             }
 
-            cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
+            cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
 
             /* Try to align the host and virtual clocks
                if the guest is in advance */
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
 
         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
             if (i == 0) {
-                prev = (j == 0 ? tb->pc : 0);
+                prev = (j == 0 ? tb_pc(tb) : 0);
             } else {
                 prev = tcg_ctx->gen_insn_data[i - 1][j];
             }
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                      uintptr_t searched_pc, bool reset_icount)
 {
-    target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
+    target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
     CPUArchState *env = cpu->env_ptr;
     const uint8_t *p = tb->tc.ptr + tb->tc.size;
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
     const TranslationBlock *a = ap;
     const TranslationBlock *b = bp;
 
-    return a->pc == b->pc &&
+    return tb_pc(a) == tb_pc(b) &&
         a->cs_base == b->cs_base &&
         a->flags == b->flags &&
         (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
@@ -XXX,XX +XXX,XX @@ static void do_tb_invalidate_check(void *p, uint32_t hash, void *userp)
     TranslationBlock *tb = p;
     target_ulong addr = *(target_ulong *)userp;
 
-    if (!(addr + TARGET_PAGE_SIZE <= tb->pc || addr >= tb->pc + tb->size)) {
+    if (!(addr + TARGET_PAGE_SIZE <= tb_pc(tb) ||
+          addr >= tb_pc(tb) + tb->size)) {
         printf("ERROR invalidate: address=" TARGET_FMT_lx
-               " PC=%08lx size=%04x\n", addr, (long)tb->pc, tb->size);
+               " PC=%08lx size=%04x\n", addr, (long)tb_pc(tb), tb->size);
     }
 }
 
@@ -XXX,XX +XXX,XX @@ static void do_tb_page_check(void *p, uint32_t hash, void *userp)
     TranslationBlock *tb = p;
     int flags1, flags2;
 
-    flags1 = page_get_flags(tb->pc);
-    flags2 = page_get_flags(tb->pc + tb->size - 1);
+    flags1 = page_get_flags(tb_pc(tb));
+    flags2 = page_get_flags(tb_pc(tb) + tb->size - 1);
     if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
         printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
-               (long)tb->pc, tb->size, flags1, flags2);
+               (long)tb_pc(tb), tb->size, flags1, flags2);
     }
 }
 
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
 
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0];
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags, orig_cflags,
+    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
                      tb->trace_vcpu_dstate);
     if (!qht_remove(&tb_ctx.htable, tb, h)) {
         return;
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
     }
 
     /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags,
+    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
                      tb->trace_vcpu_dstate);
     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
 
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     tcg_ctx->cpu = NULL;
     max_insns = tb->icount;
 
-    trace_translate_block(tb, tb->pc, tb->tc.ptr);
+    trace_translate_block(tb, pc, tb->tc.ptr);
 
     /* generate machine code */
     tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID;
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     ti = profile_getclock();
 #endif
 
-    gen_code_size = tcg_gen_code(tcg_ctx, tb);
+    gen_code_size = tcg_gen_code(tcg_ctx, tb, pc);
     if (unlikely(gen_code_size < 0)) {
  error_return:
         switch (gen_code_size) {
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
-        qemu_log_in_addr_range(tb->pc)) {
+        qemu_log_in_addr_range(pc)) {
         FILE *logfile = qemu_log_trylock();
         if (logfile) {
             int code_size, data_size;
@@ -XXX,XX +XXX,XX @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
      */
     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
 
-    qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
-                           "cpu_io_recompile: rewound execution of TB to "
-                           TARGET_FMT_lx "\n", tb->pc);
+    if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
+        target_ulong pc = log_pc(cpu, tb);
+        if (qemu_log_in_addr_range(pc)) {
+            qemu_log("cpu_io_recompile: rewound execution of TB to "
+                     TARGET_FMT_lx "\n", pc);
+        }
+    }
 
     cpu_loop_exit_noexc(cpu);
 }
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -XXX,XX +XXX,XX @@ void arm_cpu_synchronize_from_tb(CPUState *cs,
      * never possible for an AArch64 TB to chain to an AArch32 TB.
      */
     if (is_a64(env)) {
-        env->pc = tb->pc;
+        env->pc = tb_pc(tb);
     } else {
-        env->regs[15] = tb->pc;
+        env->regs[15] = tb_pc(tb);
     }
 }
 #endif /* CONFIG_TCG */
diff --git a/target/avr/cpu.c b/target/avr/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/avr/cpu.c
+++ b/target/avr/cpu.c
@@ -XXX,XX +XXX,XX @@ static void avr_cpu_synchronize_from_tb(CPUState *cs,
     AVRCPU *cpu = AVR_CPU(cs);
     CPUAVRState *env = &cpu->env;
 
-    env->pc_w = tb->pc / 2; /* internally PC points to words */
+    env->pc_w = tb_pc(tb) / 2; /* internally PC points to words */
 }
 
 static void avr_cpu_reset(DeviceState *ds)
diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/hexagon/cpu.c
+++ b/target/hexagon/cpu.c
@@ -XXX,XX +XXX,XX @@ static void hexagon_cpu_synchronize_from_tb(CPUState *cs,
 {
     HexagonCPU *cpu = HEXAGON_CPU(cs);
     CPUHexagonState *env = &cpu->env;
-    env->gpr[HEX_REG_PC] = tb->pc;
+    env->gpr[HEX_REG_PC] = tb_pc(tb);
 }
 
 static bool hexagon_cpu_has_work(CPUState *cs)
diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
     HPPACPU *cpu = HPPA_CPU(cs);
 
 #ifdef CONFIG_USER_ONLY
-    cpu->env.iaoq_f = tb->pc;
+    cpu->env.iaoq_f = tb_pc(tb);
     cpu->env.iaoq_b = tb->cs_base;
 #else
     /* Recover the IAOQ values from the GVA + PRIV.  */
@@ -XXX,XX +XXX,XX @@ static void hppa_cpu_synchronize_from_tb(CPUState *cs,
     int32_t diff = cs_base;
 
     cpu->env.iasq_f = iasq_f;
-    cpu->env.iaoq_f = (tb->pc & ~iasq_f) + priv;
+    cpu->env.iaoq_f = (tb_pc(tb) & ~iasq_f) + priv;
     if (diff) {
         cpu->env.iaoq_b = cpu->env.iaoq_f + diff;
     }
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -XXX,XX +XXX,XX @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
 {
     X86CPU *cpu = X86_CPU(cs);
 
-    cpu->env.eip = tb->pc - tb->cs_base;
+    cpu->env.eip = tb_pc(tb) - tb->cs_base;
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -XXX,XX +XXX,XX @@ static void loongarch_cpu_synchronize_from_tb(CPUState *cs,
     LoongArchCPU *cpu = LOONGARCH_CPU(cs);
     CPULoongArchState *env = &cpu->env;
 
-    env->pc = tb->pc;
+    env->pc = tb_pc(tb);
 }
 #endif /* CONFIG_TCG */
 
diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/microblaze/cpu.c
+++ b/target/microblaze/cpu.c
@@ -XXX,XX +XXX,XX @@ static void mb_cpu_synchronize_from_tb(CPUState *cs,
 {
     MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs);
 
-    cpu->env.pc = tb->pc;
+    cpu->env.pc = tb_pc(tb);
     cpu->env.iflags = tb->flags & IFLAGS_TB_MASK;
 }
 
diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c
index XXXXXXX..XXXXXXX 100644
--- a/target/mips/tcg/exception.c
+++ b/target/mips/tcg/exception.c
@@ -XXX,XX +XXX,XX @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb)
     MIPSCPU *cpu = MIPS_CPU(cs);
     CPUMIPSState *env = &cpu->env;
 
-    env->active_tc.PC = tb->pc;
+    env->active_tc.PC = tb_pc(tb);
     env->hflags &= ~MIPS_HFLAG_BMASK;
     env->hflags |= tb->flags & MIPS_HFLAG_BMASK;
 }
diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/mips/tcg/sysemu/special_helper.c
+++ b/target/mips/tcg/sysemu/special_helper.c
@@ -XXX,XX +XXX,XX @@ bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb)
     CPUMIPSState *env = &cpu->env;
 
     if ((env->hflags & MIPS_HFLAG_BMASK) != 0
-        && env->active_tc.PC != tb->pc) {
+        && env->active_tc.PC != tb_pc(tb)) {
         env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
         env->hflags &= ~MIPS_HFLAG_BMASK;
         return true;
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -XXX,XX +XXX,XX @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs,
 {
     OpenRISCCPU *cpu = OPENRISC_CPU(cs);
 
-    cpu->env.pc = tb->pc;
+    cpu->env.pc = tb_pc(tb);
 }
 
 
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -XXX,XX +XXX,XX @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs,
     RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL);
 
     if (xl == MXL_RV32) {
-        env->pc = (int32_t)tb->pc;
+        env->pc = (int32_t)tb_pc(tb);
     } else {
-        env->pc = tb->pc;
+        env->pc = tb_pc(tb);
     }
 }
 
diff --git a/target/rx/cpu.c b/target/rx/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -XXX,XX +XXX,XX @@ static void rx_cpu_synchronize_from_tb(CPUState *cs,
 {
     RXCPU *cpu = RX_CPU(cs);
 
-    cpu->env.pc = tb->pc;
+    cpu->env.pc = tb_pc(tb);
 }
 
 static bool rx_cpu_has_work(CPUState *cs)
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
 {
     SuperHCPU *cpu = SUPERH_CPU(cs);
 
-    cpu->env.pc = tb->pc;
+    cpu->env.pc = tb_pc(tb);
     cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
 }
 
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
     CPUSH4State *env = &cpu->env;
 
     if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
-        && env->pc != tb->pc) {
+        && env->pc != tb_pc(tb)) {
         env->pc -= 2;
         env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
         return true;
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -XXX,XX +XXX,XX @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs,
 {
     SPARCCPU *cpu = SPARC_CPU(cs);
 
-    cpu->env.pc = tb->pc;
+    cpu->env.pc = tb_pc(tb);
     cpu->env.npc = tb->cs_base;
 }
 
diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/tricore/cpu.c
+++ b/target/tricore/cpu.c
@@ -XXX,XX +XXX,XX @@ static void tricore_cpu_synchronize_from_tb(CPUState *cs,
     TriCoreCPU *cpu = TRICORE_CPU(cs);
     CPUTriCoreState *env = &cpu->env;
 
-    env->PC = tb->pc;
+    env->PC = tb_pc(tb);
 }
 
 static void tricore_cpu_reset(DeviceState *dev)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index XXXXXXX..XXXXXXX 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
 #endif
 
 
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
 {
 #ifdef CONFIG_PROFILER
     TCGProfile *prof = &s->prof;
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
-                 && qemu_log_in_addr_range(tb->pc))) {
+                 && qemu_log_in_addr_range(pc_start))) {
         FILE *logfile = qemu_log_trylock();
         if (logfile) {
             fprintf(logfile, "OP:\n");
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     if (s->nb_indirects > 0) {
 #ifdef DEBUG_DISAS
         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
-                     && qemu_log_in_addr_range(tb->pc))) {
+                     && qemu_log_in_addr_range(pc_start))) {
             FILE *logfile = qemu_log_trylock();
             if (logfile) {
                 fprintf(logfile, "OP before indirect lowering:\n");
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
-                 && qemu_log_in_addr_range(tb->pc))) {
+                 && qemu_log_in_addr_range(pc_start))) {
         FILE *logfile = qemu_log_trylock();
         if (logfile) {
             fprintf(logfile, "OP after optimization and liveness analysis:\n");
-- 
2.34.1

Prepare for targets to be able to produce TBs that can
run in more than one virtual context.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 accel/tcg/internal.h      |  4 +++
 accel/tcg/tb-jmp-cache.h  | 41 +++++++++++++++++++++++++
 include/exec/cpu-defs.h   |  3 ++
 include/exec/exec-all.h   | 32 ++++++++++++++++++--
 accel/tcg/cpu-exec.c      | 16 ++++++----
 accel/tcg/translate-all.c | 64 ++++++++++++++++++++++++++-------------
 6 files changed, 131 insertions(+), 29 deletions(-)

diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -XXX,XX +XXX,XX @@ void tb_htable_init(void);
 /* Return the current PC from CPU, which may be cached in TB. */
 static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
 {
+#if TARGET_TB_PCREL
+    return cpu->cc->get_pc(cpu);
+#else
     return tb_pc(tb);
+#endif
 }
 
 #endif /* ACCEL_TCG_INTERNAL_H */
diff --git a/accel/tcg/tb-jmp-cache.h b/accel/tcg/tb-jmp-cache.h
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/tb-jmp-cache.h
+++ b/accel/tcg/tb-jmp-cache.h
@@ -XXX,XX +XXX,XX @@
 
 /*
  * Accessed in parallel; all accesses to 'tb' must be atomic.
+ * For TARGET_TB_PCREL, accesses to 'pc' must be protected by
+ * a load_acquire/store_release to 'tb'.
  */
 struct CPUJumpCache {
     struct {
         TranslationBlock *tb;
+#if TARGET_TB_PCREL
+        target_ulong pc;
+#endif
     } array[TB_JMP_CACHE_SIZE];
 };
 
+static inline TranslationBlock *
+tb_jmp_cache_get_tb(CPUJumpCache *jc, uint32_t hash)
+{
+#if TARGET_TB_PCREL
+    /* Use acquire to ensure current load of pc from jc. */
+    return qatomic_load_acquire(&jc->array[hash].tb);
+#else
+    /* Use rcu_read to ensure current load of pc from *tb. */
+    return qatomic_rcu_read(&jc->array[hash].tb);
+#endif
+}
+
+static inline target_ulong
+tb_jmp_cache_get_pc(CPUJumpCache *jc, uint32_t hash, TranslationBlock *tb)
+{
+#if TARGET_TB_PCREL
+    return jc->array[hash].pc;
+#else
+    return tb_pc(tb);
+#endif
+}
+
+static inline void
+tb_jmp_cache_set(CPUJumpCache *jc, uint32_t hash,
+                 TranslationBlock *tb, target_ulong pc)
+{
+#if TARGET_TB_PCREL
+    jc->array[hash].pc = pc;
+    /* Use store_release on tb to ensure pc is written first. */
+    qatomic_store_release(&jc->array[hash].tb, tb);
+#else
+    /* Use the pc value already stored in tb->pc. */
+    qatomic_set(&jc->array[hash].tb, tb);
+#endif
+}
+
 #endif /* ACCEL_TCG_TB_JMP_CACHE_H */
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -XXX,XX +XXX,XX @@
 #  error TARGET_PAGE_BITS must be defined in cpu-param.h
 # endif
 #endif
+#ifndef TARGET_TB_PCREL
+# define TARGET_TB_PCREL 0
+#endif
 
 #define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index XXXXXXX..XXXXXXX 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -XXX,XX +XXX,XX @@ struct tb_tc {
 };
 
 struct TranslationBlock {
-    target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
-    target_ulong cs_base; /* CS base for this block */
+#if !TARGET_TB_PCREL
+    /*
+     * Guest PC corresponding to this block.  This must be the true
+     * virtual address.  Therefore e.g. x86 stores EIP + CS_BASE, and
+     * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
+     * privilege, must store those bits elsewhere.
+     *
+     * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
+     * written such that the TB is associated only with the physical
+     * page and may be run in any virtual address context.  In this case,
+     * PC must always be taken from ENV in a target-specific manner.
+     * Unwind information is taken as offsets from the page, to be
+     * deposited into the "current" PC.
+     */
+    target_ulong pc;
+#endif
+
+    /*
+     * Target-specific data associated with the TranslationBlock, e.g.:
+     * x86: the original user, the Code Segment virtual base,
+     * arm: an extension of tb->flags,
+     * s390x: instruction data for EXECUTE,
+     * sparc: the next pc of the instruction queue (for delay slots).
+     */
+    target_ulong cs_base;
+
     uint32_t flags; /* flags defining in which context the code was generated */
     uint32_t cflags;    /* compile flags */
 
@@ -XXX,XX +XXX,XX @@ struct TranslationBlock {
 /* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
 static inline target_ulong tb_pc(const TranslationBlock *tb)
 {
+#if TARGET_TB_PCREL
+    qemu_build_not_reached();
+#else
     return tb->pc;
+#endif
 }
 
 /* Hide the qatomic_read to make code a little easier on the eyes */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
     const TranslationBlock *tb = p;
     const struct tb_desc *desc = d;
 
-    if (tb_pc(tb) == desc->pc &&
+    if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
         tb->page_addr[0] == desc->page_addr0 &&
         tb->cs_base == desc->cs_base &&
         tb->flags == desc->flags &&
@@ -XXX,XX +XXX,XX @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
         return NULL;
     }
     desc.page_addr0 = phys_pc;
-    h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
+    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
+                     flags, cflags, *cpu->trace_dstate);
     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 }
 
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
                                           uint32_t flags, uint32_t cflags)
 {
     TranslationBlock *tb;
+    CPUJumpCache *jc;
     uint32_t hash;
 
     /* we should never be trying to look up an INVALID tb */
     tcg_debug_assert(!(cflags & CF_INVALID));
 
     hash = tb_jmp_cache_hash_func(pc);
-    tb = qatomic_rcu_read(&cpu->tb_jmp_cache->array[hash].tb);
+    jc = cpu->tb_jmp_cache;
+    tb = tb_jmp_cache_get_tb(jc, hash);
 
     if (likely(tb &&
-               tb->pc == pc &&
+               tb_jmp_cache_get_pc(jc, hash, tb) == pc &&
                tb->cs_base == cs_base &&
                tb->flags == flags &&
                tb->trace_vcpu_dstate == *cpu->trace_dstate &&
@@ -XXX,XX +XXX,XX @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
     if (tb == NULL) {
         return NULL;
     }
-    qatomic_set(&cpu->tb_jmp_cache->array[hash].tb, tb);
+    tb_jmp_cache_set(jc, hash, tb, pc);
     return tb;
 }
 
@@ -XXX,XX +XXX,XX @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
         if (cc->tcg_ops->synchronize_from_tb) {
             cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
         } else {
+            assert(!TARGET_TB_PCREL);
             assert(cc->set_pc);
             cc->set_pc(cpu, tb_pc(last_tb));
         }
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
                  * for the fast lookup
                  */
                 h = tb_jmp_cache_hash_func(pc);
-                qatomic_set(&cpu->tb_jmp_cache->array[h].tb, tb);
+                tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc);
             }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index XXXXXXX..XXXXXXX 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
 
         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
             if (i == 0) {
-                prev = (j == 0 ? tb_pc(tb) : 0);
+                prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
             } else {
                 prev = tcg_ctx->gen_insn_data[i - 1][j];
             }
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
 static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                      uintptr_t searched_pc, bool reset_icount)
 {
-    target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
+    target_ulong data[TARGET_INSN_START_WORDS];
     uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
     CPUArchState *env = cpu->env_ptr;
     const uint8_t *p = tb->tc.ptr + tb->tc.size;
@@ -XXX,XX +XXX,XX @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
         return -1;
     }
 
+    memset(data, 0, sizeof(data));
+    if (!TARGET_TB_PCREL) {
+        data[0] = tb_pc(tb);
+    }
+
     /* Reconstruct the stored insn data while looking for the point at
        which the end of the insn exceeds the searched_pc.  */
     for (i = 0; i < num_insns; ++i) {
@@ -XXX,XX +XXX,XX @@ static bool tb_cmp(const void *ap, const void *bp)
     const TranslationBlock *a = ap;
     const TranslationBlock *b = bp;
 
-    return tb_pc(a) == tb_pc(b) &&
-        a->cs_base == b->cs_base &&
-        a->flags == b->flags &&
-        (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
-        a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
-        a->page_addr[0] == b->page_addr[0] &&
-        a->page_addr[1] == b->page_addr[1];
+    return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
+            a->cs_base == b->cs_base &&
+            a->flags == b->flags &&
+            (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
+            a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
+            a->page_addr[0] == b->page_addr[0] &&
+            a->page_addr[1] == b->page_addr[1]);
 }
 
 void tb_htable_init(void)
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
     qemu_spin_unlock(&dest->jmp_lock);
 }
 
+static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
+{
+    CPUState *cpu;
+
+    if (TARGET_TB_PCREL) {
+        /* A TB may be at any virtual address */
+        CPU_FOREACH(cpu) {
+            tcg_flush_jmp_cache(cpu);
+        }
+    } else {
+        uint32_t h = tb_jmp_cache_hash_func(tb_pc(tb));
+
+        CPU_FOREACH(cpu) {
+            CPUJumpCache *jc = cpu->tb_jmp_cache;
+
+            if (qatomic_read(&jc->array[h].tb) == tb) {
+                qatomic_set(&jc->array[h].tb, NULL);
+            }
+        }
+    }
+}
+
 /*
  * In user-mode, call with mmap_lock held.
  * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
@@ -XXX,XX +XXX,XX @@ static inline void tb_jmp_unlink(TranslationBlock *dest)
  */
 static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
 {
-    CPUState *cpu;
     PageDesc *p;
     uint32_t h;
     tb_page_addr_t phys_pc;
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
 
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0];
-    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
-                     tb->trace_vcpu_dstate);
+    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
+                     tb->flags, orig_cflags, tb->trace_vcpu_dstate);
     if (!qht_remove(&tb_ctx.htable, tb, h)) {
         return;
     }
@@ -XXX,XX +XXX,XX @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
     }
 
     /* remove the TB from the hash list */
-    h = tb_jmp_cache_hash_func(tb->pc);
-    CPU_FOREACH(cpu) {
-        CPUJumpCache *jc = cpu->tb_jmp_cache;
-        if (qatomic_read(&jc->array[h].tb) == tb) {
-            qatomic_set(&jc->array[h].tb, NULL);
-        }
-    }
+    tb_jmp_cache_inval_tb(tb);
 
     /* suppress this TB from the two jump lists */
     tb_remove_from_jmp_list(tb, 0);
@@ -XXX,XX +XXX,XX @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
     }
 
     /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
-                     tb->trace_vcpu_dstate);
+    h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
+                     tb->flags, tb->cflags, tb->trace_vcpu_dstate);
     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
 
     /* remove TB from the page(s) if we couldn't insert it */
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
     gen_code_buf = tcg_ctx->code_gen_ptr;
     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
+#if !TARGET_TB_PCREL
     tb->pc = pc;
+#endif
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;
-- 
2.34.1

From: Leandro Lupori <leandro.lupori@eldorado.org.br>

PowerPC64 processors handle direct branches better than indirect
ones, resulting in less stalled cycles and branch misses.

However, PPC's tb_target_set_jmp_target() was only using direct
branches for 16-bit jumps, while PowerPC64's unconditional branch
instructions are able to handle displacements of up to 26 bits.
To take advantage of this, now jumps whose displacements fit in
between 17 and 26 bits are also converted to direct branches.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Leandro Lupori <leandro.lupori@eldorado.org.br>
[rth: Expanded some commentary.]
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target.c.inc | 119 +++++++++++++++++++++++++++++----------
 1 file changed, 88 insertions(+), 31 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index XXXXXXX..XXXXXXX 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
     tcg_out32(s, insn);
 }
 
+static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2)
+{
+    if (HOST_BIG_ENDIAN) {
+        return (uint64_t)i1 << 32 | i2;
+    }
+    return (uint64_t)i2 << 32 | i1;
+}
+
+static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw,
+                                  tcg_insn_unit i0, tcg_insn_unit i1)
+{
+#if TCG_TARGET_REG_BITS == 64
+    qatomic_set((uint64_t *)rw, make_pair(i0, i1));
+    flush_idcache_range(rx, rw, 8);
+#else
+    qemu_build_not_reached();
+#endif
+}
+
+static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw,
+                                  tcg_insn_unit i0, tcg_insn_unit i1,
+                                  tcg_insn_unit i2, tcg_insn_unit i3)
+{
+    uint64_t p[2];
+
+    p[!HOST_BIG_ENDIAN] = make_pair(i0, i1);
+    p[HOST_BIG_ENDIAN] = make_pair(i2, i3);
+
+    /*
+     * There's no convenient way to get the compiler to allocate a pair
+     * of registers at an even index, so copy into r6/r7 and clobber.
+     */
+    asm("mr  %%r6, %1\n\t"
+        "mr  %%r7, %2\n\t"
+        "stq %%r6, %0"
+        : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7");
+    flush_idcache_range(rx, rw, 16);
+}
+
 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx,
                               uintptr_t jmp_rw, uintptr_t addr)
 {
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_insn_unit i1, i2;
-        intptr_t tb_diff = addr - tc_ptr;
-        intptr_t br_diff = addr - (jmp_rx + 4);
-        uint64_t pair;
+    tcg_insn_unit i0, i1, i2, i3;
+    intptr_t tb_diff = addr - tc_ptr;
+    intptr_t br_diff = addr - (jmp_rx + 4);
+    intptr_t lo, hi;
 
-        /* This does not exercise the range of the branch, but we do
-           still need to be able to load the new value of TCG_REG_TB.
-           But this does still happen quite often.  */
-        if (tb_diff == (int16_t)tb_diff) {
-            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
-            i2 = B | (br_diff & 0x3fffffc);
-        } else {
-            intptr_t lo = (int16_t)tb_diff;
-            intptr_t hi = (int32_t)(tb_diff - lo);
-            assert(tb_diff == hi + lo);
-            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
-            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
-        }
-#if HOST_BIG_ENDIAN
-        pair = (uint64_t)i1 << 32 | i2;
-#else
-        pair = (uint64_t)i2 << 32 | i1;
-#endif
-
-        /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
-           within qatomic_set that would fail to build a ppc32 host.  */
-        qatomic_set__nocheck((uint64_t *)jmp_rw, pair);
-        flush_idcache_range(jmp_rx, jmp_rw, 8);
-    } else {
+    if (TCG_TARGET_REG_BITS == 32) {
         intptr_t diff = addr - jmp_rx;
         tcg_debug_assert(in_range_b(diff));
         qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc));
         flush_idcache_range(jmp_rx, jmp_rw, 4);
+        return;
     }
+
+    /*
+     * For 16-bit displacements, we can use a single add + branch.
+     * This happens quite often.
+     */
+    if (tb_diff == (int16_t)tb_diff) {
+        i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
+        i1 = B | (br_diff & 0x3fffffc);
+        ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
+        return;
+    }
+
+    lo = (int16_t)tb_diff;
+    hi = (int32_t)(tb_diff - lo);
+    assert(tb_diff == hi + lo);
+    i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
+    i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
+
+    /*
+     * Without stq from 2.07, we can only update two insns,
+     * and those must be the ones that load the target address.
+     */
+    if (!have_isa_2_07) {
+        ppc64_replace2(jmp_rx, jmp_rw, i0, i1);
+        return;
+    }
+
+    /*
+     * For 26-bit displacements, we can use a direct branch.
+     * Otherwise we still need the indirect branch, which we
+     * must restore after a potential direct branch write.
+     */
+    br_diff -= 4;
+    if (in_range_b(br_diff)) {
+        i2 = B | (br_diff & 0x3fffffc);
+        i3 = NOP;
+    } else {
+        i2 = MTSPR | RS(TCG_REG_TB) | CTR;
+        i3 = BCCTR | BO_ALWAYS;
+    }
+    ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3);
 }
 
 static void tcg_out_call_int(TCGContext *s, int lk,
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         if (s->tb_jmp_insn_offset) {
             /* Direct jump. */
             if (TCG_TARGET_REG_BITS == 64) {
-                /* Ensure the next insns are 8-byte aligned. */
-                if ((uintptr_t)s->code_ptr & 7) {
+                /* Ensure the next insns are 8 or 16-byte aligned. */
+                while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) {
                     tcg_out32(s, NOP);
                 }
                 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
-- 
2.34.1

The value previously chosen overlaps GUSA_MASK.

Rename all DELAY_SLOT_* and GUSA_* defines to emphasize
that they are included in TB_FLAGs.  Add aliases for the
FPSCR and SR bits that are included in TB_FLAGS, so that
we don't accidentally reassign those bits.

Fixes: 4da06fb3062 ("target/sh4: Implement prctl_unalign_sigbus")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/856
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/sh4/cpu.h        | 56 +++++++++++++------------
 linux-user/sh4/signal.c |  6 +--
 target/sh4/cpu.c        |  6 +--
 target/sh4/helper.c     |  6 +--
 target/sh4/translate.c  | 90 ++++++++++++++++++++++-------------------
 5 files changed, 88 insertions(+), 76 deletions(-)

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index XXXXXXX..XXXXXXX 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -XXX,XX +XXX,XX @@
 #define FPSCR_RM_NEAREST       (0 << 0)
 #define FPSCR_RM_ZERO          (1 << 0)
 
-#define DELAY_SLOT_MASK        0x7
-#define DELAY_SLOT             (1 << 0)
-#define DELAY_SLOT_CONDITIONAL (1 << 1)
-#define DELAY_SLOT_RTE         (1 << 2)
+#define TB_FLAG_DELAY_SLOT       (1 << 0)
+#define TB_FLAG_DELAY_SLOT_COND  (1 << 1)
+#define TB_FLAG_DELAY_SLOT_RTE   (1 << 2)
+#define TB_FLAG_PENDING_MOVCA    (1 << 3)
+#define TB_FLAG_GUSA_SHIFT       4                      /* [11:4] */
+#define TB_FLAG_GUSA_EXCLUSIVE   (1 << 12)
+#define TB_FLAG_UNALIGN          (1 << 13)
+#define TB_FLAG_SR_FD            (1 << SR_FD)           /* 15 */
+#define TB_FLAG_FPSCR_PR         FPSCR_PR               /* 19 */
+#define TB_FLAG_FPSCR_SZ         FPSCR_SZ               /* 20 */
+#define TB_FLAG_FPSCR_FR         FPSCR_FR               /* 21 */
+#define TB_FLAG_SR_RB            (1 << SR_RB)           /* 29 */
+#define TB_FLAG_SR_MD            (1 << SR_MD)           /* 30 */
 
-#define TB_FLAG_PENDING_MOVCA  (1 << 3)
-#define TB_FLAG_UNALIGN        (1 << 4)
-
-#define GUSA_SHIFT             4
-#ifdef CONFIG_USER_ONLY
-#define GUSA_EXCLUSIVE         (1 << 12)
-#define GUSA_MASK              ((0xff << GUSA_SHIFT) | GUSA_EXCLUSIVE)
-#else
-/* Provide dummy versions of the above to allow tests against tbflags
-   to be elided while avoiding ifdefs.  */
-#define GUSA_EXCLUSIVE         0
-#define GUSA_MASK              0
-#endif
-
-#define TB_FLAG_ENVFLAGS_MASK  (DELAY_SLOT_MASK | GUSA_MASK)
+#define TB_FLAG_DELAY_SLOT_MASK  (TB_FLAG_DELAY_SLOT |       \
+                                  TB_FLAG_DELAY_SLOT_COND |  \
+                                  TB_FLAG_DELAY_SLOT_RTE)
+#define TB_FLAG_GUSA_MASK        ((0xff << TB_FLAG_GUSA_SHIFT) | \
+                                  TB_FLAG_GUSA_EXCLUSIVE)
+#define TB_FLAG_FPSCR_MASK       (TB_FLAG_FPSCR_PR | \
+                                  TB_FLAG_FPSCR_SZ | \
+                                  TB_FLAG_FPSCR_FR)
+#define TB_FLAG_SR_MASK          (TB_FLAG_SR_FD | \
+                                  TB_FLAG_SR_RB | \
+                                  TB_FLAG_SR_MD)
+#define TB_FLAG_ENVFLAGS_MASK    (TB_FLAG_DELAY_SLOT_MASK | \
+                                  TB_FLAG_GUSA_MASK)
 
 typedef struct tlb_t {
     uint32_t vpn;		/* virtual page number */
@@ -XXX,XX +XXX,XX @@ static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
 {
     /* The instruction in a RTE delay slot is fetched in privileged
        mode, but executed in user mode.  */
-    if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
+    if (ifetch && (env->flags & TB_FLAG_DELAY_SLOT_RTE)) {
         return 0;
     } else {
         return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
@@ -XXX,XX +XXX,XX @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
 {
     *pc = env->pc;
     /* For a gUSA region, notice the end of the region.  */
-    *cs_base = env->flags & GUSA_MASK ? env->gregs[0] : 0;
-    *flags = env->flags /* TB_FLAG_ENVFLAGS_MASK: bits 0-2, 4-12 */
-            | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
-            | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
-            | (env->sr & (1u << SR_FD))                        /* Bit 15 */
+    *cs_base = env->flags & TB_FLAG_GUSA_MASK ? env->gregs[0] : 0;
+    *flags = env->flags
+            | (env->fpscr & TB_FLAG_FPSCR_MASK)
+            | (env->sr & TB_FLAG_SR_MASK)
             | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 3 */
 #ifdef CONFIG_USER_ONLY
     *flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus;
diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c
index XXXXXXX..XXXXXXX 100644
--- a/linux-user/sh4/signal.c
+++ b/linux-user/sh4/signal.c
@@ -XXX,XX +XXX,XX @@ static void restore_sigcontext(CPUSH4State *regs, struct target_sigcontext *sc)
     __get_user(regs->fpul, &sc->sc_fpul);
 
     regs->tra = -1;         /* disable syscall checks */
-    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
+    regs->flags = 0;
 }
 
 void setup_frame(int sig, struct target_sigaction *ka,
@@ -XXX,XX +XXX,XX @@ void setup_frame(int sig, struct target_sigaction *ka,
     regs->gregs[5] = 0;
     regs->gregs[6] = frame_addr += offsetof(typeof(*frame), sc);
     regs->pc = (unsigned long) ka->_sa_handler;
-    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
+    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
 
     unlock_user_struct(frame, frame_addr, 1);
     return;
@@ -XXX,XX +XXX,XX @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
     regs->gregs[5] = frame_addr + offsetof(typeof(*frame), info);
     regs->gregs[6] = frame_addr + offsetof(typeof(*frame), uc);
     regs->pc = (unsigned long) ka->_sa_handler;
-    regs->flags &= ~(DELAY_SLOT_MASK | GUSA_MASK);
+    regs->flags &= ~(TB_FLAG_DELAY_SLOT_MASK | TB_FLAG_GUSA_MASK);
 
     unlock_user_struct(frame, frame_addr, 1);
     return;
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -XXX,XX +XXX,XX @@ static void superh_cpu_synchronize_from_tb(CPUState *cs,
     SuperHCPU *cpu = SUPERH_CPU(cs);
 
     cpu->env.pc = tb_pc(tb);
-    cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK;
+    cpu->env.flags = tb->flags;
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -XXX,XX +XXX,XX @@ static bool superh_io_recompile_replay_branch(CPUState *cs,
     SuperHCPU *cpu = SUPERH_CPU(cs);
     CPUSH4State *env = &cpu->env;
 
-    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
+    if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND))
         && env->pc != tb_pc(tb)) {
         env->pc -= 2;
-        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+        env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND);
         return true;
     }
     return false;
diff --git a/target/sh4/helper.c b/target/sh4/helper.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sh4/helper.c
+++ b/target/sh4/helper.c
@@ -XXX,XX +XXX,XX @@ void superh_cpu_do_interrupt(CPUState *cs)
     env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB);
     env->lock_addr = -1;
 
-    if (env->flags & DELAY_SLOT_MASK) {
+    if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
         /* Branch instruction should be executed again before delay slot. */
 	env->spc -= 2;
 	/* Clear flags for exception/interrupt routine. */
-        env->flags &= ~DELAY_SLOT_MASK;
+        env->flags &= ~TB_FLAG_DELAY_SLOT_MASK;
     }
 
     if (do_exp) {
@@ -XXX,XX +XXX,XX @@ bool superh_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
         CPUSH4State *env = &cpu->env;
 
         /* Delay slots are indivisible, ignore interrupts */
-        if (env->flags & DELAY_SLOT_MASK) {
+        if (env->flags & TB_FLAG_DELAY_SLOT_MASK) {
             return false;
         } else {
             superh_cpu_do_interrupt(cs);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index XXXXXXX..XXXXXXX 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -XXX,XX +XXX,XX @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, int flags)
 		    i, env->gregs[i], i + 1, env->gregs[i + 1],
 		    i + 2, env->gregs[i + 2], i + 3, env->gregs[i + 3]);
     }
-    if (env->flags & DELAY_SLOT) {
+    if (env->flags & TB_FLAG_DELAY_SLOT) {
         qemu_printf("in delay slot (delayed_pc=0x%08x)\n",
 		    env->delayed_pc);
-    } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
+    } else if (env->flags & TB_FLAG_DELAY_SLOT_COND) {
         qemu_printf("in conditional delay slot (delayed_pc=0x%08x)\n",
 		    env->delayed_pc);
-    } else if (env->flags & DELAY_SLOT_RTE) {
+    } else if (env->flags & TB_FLAG_DELAY_SLOT_RTE) {
         qemu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
                      env->delayed_pc);
     }
@@ -XXX,XX +XXX,XX @@ static inline void gen_save_cpu_state(DisasContext *ctx, bool save_pc)
 
 static inline bool use_exit_tb(DisasContext *ctx)
 {
-    return (ctx->tbflags & GUSA_EXCLUSIVE) != 0;
+    return (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) != 0;
 }
 
 static bool use_goto_tb(DisasContext *ctx, target_ulong dest)
@@ -XXX,XX +XXX,XX @@ static void gen_conditional_jump(DisasContext *ctx, target_ulong dest,
     TCGLabel *l1 = gen_new_label();
     TCGCond cond_not_taken = jump_if_true ? TCG_COND_EQ : TCG_COND_NE;
 
-    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
         /* When in an exclusive region, we must continue to the end.
            Therefore, exit the region on a taken branch, but otherwise
            fall through to the next instruction.  */
         tcg_gen_brcondi_i32(cond_not_taken, cpu_sr_t, 0, l1);
-        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
         /* Note that this won't actually use a goto_tb opcode because we
            disallow it in use_goto_tb, but it handles exit + singlestep.  */
         gen_goto_tb(ctx, 0, dest);
@@ -XXX,XX +XXX,XX @@ static void gen_delayed_conditional_jump(DisasContext * ctx)
     tcg_gen_mov_i32(ds, cpu_delayed_cond);
     tcg_gen_discard_i32(cpu_delayed_cond);
 
-    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
         /* When in an exclusive region, we must continue to the end.
            Therefore, exit the region on a taken branch, but otherwise
            fall through to the next instruction.  */
         tcg_gen_brcondi_i32(TCG_COND_EQ, ds, 0, l1);
 
         /* Leave the gUSA region.  */
-        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~GUSA_MASK);
+        tcg_gen_movi_i32(cpu_flags, ctx->envflags & ~TB_FLAG_GUSA_MASK);
         gen_jump(ctx);
 
         gen_set_label(l1);
@@ -XXX,XX +XXX,XX @@ static inline void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
 
 #define CHECK_NOT_DELAY_SLOT \
-    if (ctx->envflags & DELAY_SLOT_MASK) {  \
-        goto do_illegal_slot;               \
+    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {  \
+        goto do_illegal_slot;                       \
     }
 
 #define CHECK_PRIVILEGED \
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
     case 0x000b:		/* rts */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0028:		/* clrmac */
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
         gen_write_sr(cpu_ssr);
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
-        ctx->envflags |= DELAY_SLOT_RTE;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT_RTE;
 	ctx->delayed_pc = (uint32_t) - 1;
         ctx->base.is_jmp = DISAS_STOP;
 	return;
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
 	return;
     case 0xe000:		/* mov #imm,Rn */
 #ifdef CONFIG_USER_ONLY
-        /* Detect the start of a gUSA region.  If so, update envflags
-           and end the TB.  This will allow us to see the end of the
-           region (stored in R0) in the next TB.  */
+        /*
+         * Detect the start of a gUSA region (mov #-n, r15).
+         * If so, update envflags and end the TB.  This will allow us
+         * to see the end of the region (stored in R0) in the next TB.
+         */
         if (B11_8 == 15 && B7_0s < 0 &&
             (tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
-            ctx->envflags = deposit32(ctx->envflags, GUSA_SHIFT, 8, B7_0s);
+            ctx->envflags =
+                deposit32(ctx->envflags, TB_FLAG_GUSA_SHIFT, 8, B7_0s);
             ctx->base.is_jmp = DISAS_STOP;
         }
 #endif
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
     case 0xa000:		/* bra disp */
 	CHECK_NOT_DELAY_SLOT
         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	return;
     case 0xb000:		/* bsr disp */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
         ctx->delayed_pc = ctx->base.pc_next + 4 + B11_0s * 2;
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	return;
     }
 
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_xori_i32(cpu_delayed_cond, cpu_sr_t, 1);
         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
-        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
 	return;
     case 0x8900:		/* bt label */
 	CHECK_NOT_DELAY_SLOT
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_mov_i32(cpu_delayed_cond, cpu_sr_t);
         ctx->delayed_pc = ctx->base.pc_next + 4 + B7_0s * 2;
-        ctx->envflags |= DELAY_SLOT_CONDITIONAL;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT_COND;
 	return;
     case 0x8800:		/* cmp/eq #imm,R0 */
         tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s);
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
     case 0x0023:		/* braf Rn */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_addi_i32(cpu_delayed_pc, REG(B11_8), ctx->base.pc_next + 4);
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0003:		/* bsrf Rn */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
 	tcg_gen_add_i32(cpu_delayed_pc, REG(B11_8), cpu_pr);
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x4015:		/* cmp/pl Rn */
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
     case 0x402b:		/* jmp @Rn */
 	CHECK_NOT_DELAY_SLOT
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400b:		/* jsr @Rn */
 	CHECK_NOT_DELAY_SLOT
         tcg_gen_movi_i32(cpu_pr, ctx->base.pc_next + 4);
 	tcg_gen_mov_i32(cpu_delayed_pc, REG(B11_8));
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= TB_FLAG_DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400e:		/* ldc Rm,SR */
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
     fflush(stderr);
 #endif
  do_illegal:
-    if (ctx->envflags & DELAY_SLOT_MASK) {
+    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
  do_illegal_slot:
         gen_save_cpu_state(ctx, true);
         gen_helper_raise_slot_illegal_instruction(cpu_env);
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
 
  do_fpu_disabled:
     gen_save_cpu_state(ctx, true);
-    if (ctx->envflags & DELAY_SLOT_MASK) {
+    if (ctx->envflags & TB_FLAG_DELAY_SLOT_MASK) {
         gen_helper_raise_slot_fpu_disable(cpu_env);
     } else {
         gen_helper_raise_fpu_disable(cpu_env);
@@ -XXX,XX +XXX,XX @@ static void decode_opc(DisasContext * ctx)
 
     _decode_opc(ctx);
 
-    if (old_flags & DELAY_SLOT_MASK) {
+    if (old_flags & TB_FLAG_DELAY_SLOT_MASK) {
         /* go out of the delay slot */
-        ctx->envflags &= ~DELAY_SLOT_MASK;
+        ctx->envflags &= ~TB_FLAG_DELAY_SLOT_MASK;
 
         /* When in an exclusive region, we must continue to the end
            for conditional branches.  */
-        if (ctx->tbflags & GUSA_EXCLUSIVE
-            && old_flags & DELAY_SLOT_CONDITIONAL) {
+        if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE
+            && old_flags & TB_FLAG_DELAY_SLOT_COND) {
             gen_delayed_conditional_jump(ctx);
             return;
         }
         /* Otherwise this is probably an invalid gUSA region.
            Drop the GUSA bits so the next TB doesn't see them.  */
-        ctx->envflags &= ~GUSA_MASK;
+        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
 
         tcg_gen_movi_i32(cpu_flags, ctx->envflags);
-        if (old_flags & DELAY_SLOT_CONDITIONAL) {
+        if (old_flags & TB_FLAG_DELAY_SLOT_COND) {
 	    gen_delayed_conditional_jump(ctx);
         } else {
             gen_jump(ctx);
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
     }
 
     /* The entire region has been translated.  */
-    ctx->envflags &= ~GUSA_MASK;
+    ctx->envflags &= ~TB_FLAG_GUSA_MASK;
     ctx->base.pc_next = pc_end;
     ctx->base.num_insns += max_insns - 1;
     return;
@@ -XXX,XX +XXX,XX @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env)
 
     /* Restart with the EXCLUSIVE bit set, within a TB run via
        cpu_exec_step_atomic holding the exclusive lock.  */
-    ctx->envflags |= GUSA_EXCLUSIVE;
+    ctx->envflags |= TB_FLAG_GUSA_EXCLUSIVE;
     gen_save_cpu_state(ctx, false);
     gen_helper_exclusive(cpu_env);
     ctx->base.is_jmp = DISAS_NORETURN;
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
                   (tbflags & (1 << SR_RB))) * 0x10;
     ctx->fbank = tbflags & FPSCR_FR ? 0x10 : 0;
 
-    if (tbflags & GUSA_MASK) {
+#ifdef CONFIG_USER_ONLY
+    if (tbflags & TB_FLAG_GUSA_MASK) {
+        /* In gUSA exclusive region. */
         uint32_t pc = ctx->base.pc_next;
         uint32_t pc_end = ctx->base.tb->cs_base;
-        int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
+        int backup = sextract32(ctx->tbflags, TB_FLAG_GUSA_SHIFT, 8);
         int max_insns = (pc_end - pc) / 2;
 
         if (pc != pc_end + backup || max_insns < 2) {
             /* This is a malformed gUSA region.  Don't do anything special,
                since the interpreter is likely to get confused.  */
-            ctx->envflags &= ~GUSA_MASK;
-        } else if (tbflags & GUSA_EXCLUSIVE) {
+            ctx->envflags &= ~TB_FLAG_GUSA_MASK;
+        } else if (tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
             /* Regardless of single-stepping or the end of the page,
                we must complete execution of the gUSA region while
                holding the exclusive lock.  */
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
             return;
         }
     }
+#endif
 
     /* Since the ISA is fixed-width, we can bound by the number
        of instructions remaining on the page.  */
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
 
 #ifdef CONFIG_USER_ONLY
-    if (unlikely(ctx->envflags & GUSA_MASK)
-        && !(ctx->envflags & GUSA_EXCLUSIVE)) {
+    if (unlikely(ctx->envflags & TB_FLAG_GUSA_MASK)
+        && !(ctx->envflags & TB_FLAG_GUSA_EXCLUSIVE)) {
         /* We're in an gUSA region, and we have not already fallen
            back on using an exclusive region.  Attempt to parse the
            region into a single supported atomic operation.  Failure
@@ -XXX,XX +XXX,XX @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *ctx = container_of(dcbase, DisasContext, base);
 
-    if (ctx->tbflags & GUSA_EXCLUSIVE) {
+    if (ctx->tbflags & TB_FLAG_GUSA_EXCLUSIVE) {
         /* Ending the region of exclusivity.  Clear the bits.  */
-        ctx->envflags &= ~GUSA_MASK;
+        ctx->envflags &= ~TB_FLAG_GUSA_MASK;
     }
 
     switch (ctx->base.is_jmp) {
-- 
2.34.1