1
The following changes since commit e93ded1bf6c94ab95015b33e188bc8b0b0c32670:
1
The following changes since commit 8844bb8d896595ee1d25d21c770e6e6f29803097:
2
2
3
Merge tag 'testing-pull-request-2022-08-30' of https://gitlab.com/thuth/qemu into staging (2022-08-31 18:19:03 -0400)
3
Merge tag 'or1k-pull-request-20230513' of https://github.com/stffrdhrn/qemu into staging (2023-05-13 11:23:14 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220901
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230516
8
8
9
for you to fetch changes up to 20011be2e30b8aa8ef1fc258485f00c688703deb:
9
for you to fetch changes up to ee95d036bf4bfa10be65325a287bf3d0e8b2a0e6:
10
10
11
target/riscv: Make translator stop before the end of a page (2022-09-01 07:43:08 +0100)
11
tcg: Split out exec/user/guest-base.h (2023-05-16 08:11:53 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Respect PROT_EXEC in user-only mode.
14
tcg/i386: Fix tcg_out_addi_ptr for win64
15
Fix s390x, i386 and riscv for translations crossing a page.
15
tcg: Implement atomicity for TCGv_i128
16
tcg: First quarter of cleanups for building tcg once
16
17
17
----------------------------------------------------------------
18
----------------------------------------------------------------
18
Ilya Leoshkevich (4):
19
Richard Henderson (80):
19
linux-user: Clear translations on mprotect()
20
tcg/i386: Set P_REXW in tcg_out_addi_ptr
20
accel/tcg: Introduce is_same_page()
21
include/exec/memop: Add MO_ATOM_*
21
target/s390x: Make translator stop before the end of a page
22
accel/tcg: Honor atomicity of loads
22
target/i386: Make translator stop before the end of a page
23
accel/tcg: Honor atomicity of stores
24
tcg: Unify helper_{be,le}_{ld,st}*
25
accel/tcg: Implement helper_{ld,st}*_mmu for user-only
26
tcg/tci: Use helper_{ld,st}*_mmu for user-only
27
tcg: Add 128-bit guest memory primitives
28
meson: Detect atomic128 support with optimization
29
tcg/i386: Add have_atomic16
30
tcg/aarch64: Detect have_lse, have_lse2 for linux
31
tcg/aarch64: Detect have_lse, have_lse2 for darwin
32
tcg/i386: Use full load/store helpers in user-only mode
33
tcg/aarch64: Use full load/store helpers in user-only mode
34
tcg/ppc: Use full load/store helpers in user-only mode
35
tcg/loongarch64: Use full load/store helpers in user-only mode
36
tcg/riscv: Use full load/store helpers in user-only mode
37
tcg/arm: Adjust constraints on qemu_ld/st
38
tcg/arm: Use full load/store helpers in user-only mode
39
tcg/mips: Use full load/store helpers in user-only mode
40
tcg/s390x: Use full load/store helpers in user-only mode
41
tcg/sparc64: Allocate %g2 as a third temporary
42
tcg/sparc64: Rename tcg_out_movi_imm13 to tcg_out_movi_s13
43
target/sparc64: Remove tcg_out_movi_s13 case from tcg_out_movi_imm32
44
tcg/sparc64: Rename tcg_out_movi_imm32 to tcg_out_movi_u32
45
tcg/sparc64: Split out tcg_out_movi_s32
46
tcg/sparc64: Use standard slow path for softmmu
47
accel/tcg: Remove helper_unaligned_{ld,st}
48
tcg/loongarch64: Check the host supports unaligned accesses
49
tcg/loongarch64: Support softmmu unaligned accesses
50
tcg/riscv: Support softmmu unaligned accesses
51
tcg: Introduce tcg_target_has_memory_bswap
52
tcg: Add INDEX_op_qemu_{ld,st}_i128
53
tcg: Introduce tcg_out_movext3
54
tcg: Merge tcg_out_helper_load_regs into caller
55
tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}
56
tcg: Introduce atom_and_align_for_opc
57
tcg/i386: Use atom_and_align_for_opc
58
tcg/aarch64: Use atom_and_align_for_opc
59
tcg/arm: Use atom_and_align_for_opc
60
tcg/loongarch64: Use atom_and_align_for_opc
61
tcg/mips: Use atom_and_align_for_opc
62
tcg/ppc: Use atom_and_align_for_opc
63
tcg/riscv: Use atom_and_align_for_opc
64
tcg/s390x: Use atom_and_align_for_opc
65
tcg/sparc64: Use atom_and_align_for_opc
66
tcg/i386: Honor 64-bit atomicity in 32-bit mode
67
tcg/i386: Support 128-bit load/store with have_atomic16
68
tcg/aarch64: Rename temporaries
69
tcg/aarch64: Support 128-bit load/store
70
tcg/ppc: Support 128-bit load/store
71
tcg/s390x: Support 128-bit load/store
72
tcg: Split out memory ops to tcg-op-ldst.c
73
tcg: Widen gen_insn_data to uint64_t
74
accel/tcg: Widen tcg-ldst.h addresses to uint64_t
75
tcg: Widen helper_{ld,st}_i128 addresses to uint64_t
76
tcg: Widen helper_atomic_* addresses to uint64_t
77
tcg: Widen tcg_gen_code pc_start argument to uint64_t
78
accel/tcg: Merge gen_mem_wrapped with plugin_gen_empty_mem_callback
79
accel/tcg: Merge do_gen_mem_cb into caller
80
tcg: Reduce copies for plugin_gen_mem_callbacks
81
accel/tcg: Widen plugin_gen_empty_mem_callback to i64
82
tcg: Add addr_type to TCGContext
83
tcg: Remove TCGv from tcg_gen_qemu_{ld,st}_*
84
tcg: Remove TCGv from tcg_gen_atomic_*
85
tcg: Split INDEX_op_qemu_{ld,st}* for guest address size
86
tcg/tci: Elimnate TARGET_LONG_BITS, target_ulong
87
tcg/i386: Always enable TCG_TARGET_HAS_extr[lh]_i64_i32
88
tcg/i386: Conditionalize tcg_out_extu_i32_i64
89
tcg/i386: Adjust type of tlb_mask
90
tcg/i386: Remove TARGET_LONG_BITS, TCG_TYPE_TL
91
tcg/arm: Remove TARGET_LONG_BITS
92
tcg/aarch64: Remove USE_GUEST_BASE
93
tcg/aarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
94
tcg/loongarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
95
tcg/mips: Remove TARGET_LONG_BITS, TCG_TYPE_TL
96
tcg: Remove TARGET_LONG_BITS, TCG_TYPE_TL
97
tcg: Add page_bits and page_mask to TCGContext
98
tcg: Add tlb_dyn_max_bits to TCGContext
99
tcg: Split out exec/user/guest-base.h
23
100
24
Richard Henderson (16):
101
docs/devel/loads-stores.rst | 36 +-
25
linux-user/arm: Mark the commpage executable
102
docs/devel/tcg-ops.rst | 11 +-
26
linux-user/hppa: Allocate page zero as a commpage
103
meson.build | 52 +-
27
linux-user/x86_64: Allocate vsyscall page as a commpage
104
accel/tcg/tcg-runtime.h | 49 +-
28
linux-user: Honor PT_GNU_STACK
105
include/exec/cpu-all.h | 5 +-
29
tests/tcg/i386: Move smc_code2 to an executable section
106
include/exec/memop.h | 37 ++
30
accel/tcg: Properly implement get_page_addr_code for user-only
107
include/exec/plugin-gen.h | 4 +-
31
accel/tcg: Unlock mmap_lock after longjmp
108
include/exec/user/guest-base.h | 12 +
32
accel/tcg: Make tb_htable_lookup static
109
include/qemu/cpuid.h | 18 +
33
accel/tcg: Move qemu_ram_addr_from_host_nofail to physmem.c
110
include/tcg/tcg-ldst.h | 72 +--
34
accel/tcg: Use probe_access_internal for softmmu get_page_addr_code_hostp
111
include/tcg/tcg-op.h | 273 ++++++---
35
accel/tcg: Document the faulting lookup in tb_lookup_cmp
112
include/tcg/tcg-opc.h | 41 +-
36
accel/tcg: Remove translator_ldsw
113
include/tcg/tcg.h | 39 +-
37
accel/tcg: Add pc and host_pc params to gen_intermediate_code
114
tcg/aarch64/tcg-target-con-set.h | 2 +
38
accel/tcg: Add fast path for translator_ld*
115
tcg/aarch64/tcg-target.h | 15 +-
39
target/riscv: Add MAX_INSN_LEN and insn_len
116
tcg/arm/tcg-target-con-set.h | 16 +-
40
target/riscv: Make translator stop before the end of a page
117
tcg/arm/tcg-target-con-str.h | 5 +-
41
118
tcg/arm/tcg-target.h | 3 +-
42
include/elf.h | 1 +
119
tcg/i386/tcg-target.h | 13 +-
43
include/exec/cpu-common.h | 1 +
120
tcg/loongarch64/tcg-target.h | 3 +-
44
include/exec/exec-all.h | 89 ++++++++----------------
121
tcg/mips/tcg-target.h | 4 +-
45
include/exec/translator.h | 96 ++++++++++++++++---------
122
tcg/ppc/tcg-target-con-set.h | 2 +
46
linux-user/arm/target_cpu.h | 4 +-
123
tcg/ppc/tcg-target-con-str.h | 1 +
47
linux-user/qemu.h | 1 +
124
tcg/ppc/tcg-target.h | 4 +-
48
accel/tcg/cpu-exec.c | 143 ++++++++++++++++++++------------------
125
tcg/riscv/tcg-target.h | 4 +-
49
accel/tcg/cputlb.c | 93 +++++++------------------
126
tcg/s390x/tcg-target-con-set.h | 2 +
50
accel/tcg/translate-all.c | 29 ++++----
127
tcg/s390x/tcg-target.h | 4 +-
51
accel/tcg/translator.c | 135 ++++++++++++++++++++++++++---------
128
tcg/sparc64/tcg-target-con-set.h | 2 -
52
accel/tcg/user-exec.c | 17 ++++-
129
tcg/sparc64/tcg-target-con-str.h | 1 -
53
linux-user/elfload.c | 82 ++++++++++++++++++++--
130
tcg/sparc64/tcg-target.h | 4 +-
54
linux-user/mmap.c | 6 +-
131
tcg/tcg-internal.h | 2 +
55
softmmu/physmem.c | 12 ++++
132
tcg/tci/tcg-target.h | 4 +-
56
target/alpha/translate.c | 5 +-
133
accel/tcg/cputlb.c | 839 ++++++++++++++++---------
57
target/arm/translate.c | 5 +-
134
accel/tcg/plugin-gen.c | 68 +-
58
target/avr/translate.c | 5 +-
135
accel/tcg/translate-all.c | 35 +-
59
target/cris/translate.c | 5 +-
136
accel/tcg/user-exec.c | 488 ++++++++++-----
60
target/hexagon/translate.c | 6 +-
137
tcg/optimize.c | 19 +-
61
target/hppa/translate.c | 5 +-
138
tcg/tcg-op-ldst.c | 1234 +++++++++++++++++++++++++++++++++++++
62
target/i386/tcg/translate.c | 71 +++++++++++--------
139
tcg/tcg-op.c | 864 --------------------------
63
target/loongarch/translate.c | 6 +-
140
tcg/tcg.c | 627 +++++++++++++++----
64
target/m68k/translate.c | 5 +-
141
tcg/tci.c | 243 +++-----
65
target/microblaze/translate.c | 5 +-
142
accel/tcg/atomic_common.c.inc | 14 +-
66
target/mips/tcg/translate.c | 5 +-
143
accel/tcg/ldst_atomicity.c.inc | 1262 ++++++++++++++++++++++++++++++++++++++
67
target/nios2/translate.c | 5 +-
144
tcg/aarch64/tcg-target.c.inc | 438 ++++++++-----
68
target/openrisc/translate.c | 6 +-
145
tcg/arm/tcg-target.c.inc | 246 +++-----
69
target/ppc/translate.c | 5 +-
146
tcg/i386/tcg-target.c.inc | 467 ++++++++++----
70
target/riscv/translate.c | 32 +++++++--
147
tcg/loongarch64/tcg-target.c.inc | 123 ++--
71
target/rx/translate.c | 5 +-
148
tcg/mips/tcg-target.c.inc | 216 +++----
72
target/s390x/tcg/translate.c | 20 ++++--
149
tcg/ppc/tcg-target.c.inc | 300 +++++----
73
target/sh4/translate.c | 5 +-
150
tcg/riscv/tcg-target.c.inc | 161 ++---
74
target/sparc/translate.c | 5 +-
151
tcg/s390x/tcg-target.c.inc | 207 ++++---
75
target/tricore/translate.c | 6 +-
152
tcg/sparc64/tcg-target.c.inc | 731 ++++++++--------------
76
target/xtensa/translate.c | 6 +-
153
tcg/tci/tcg-target.c.inc | 58 +-
77
tests/tcg/i386/test-i386.c | 2 +-
154
tcg/meson.build | 1 +
78
tests/tcg/riscv64/noexec.c | 79 +++++++++++++++++++++
155
54 files changed, 5988 insertions(+), 3393 deletions(-)
79
tests/tcg/s390x/noexec.c | 106 ++++++++++++++++++++++++++++
156
create mode 100644 include/exec/user/guest-base.h
80
tests/tcg/x86_64/noexec.c | 75 ++++++++++++++++++++
157
create mode 100644 tcg/tcg-op-ldst.c
81
tests/tcg/multiarch/noexec.c.inc | 139 ++++++++++++++++++++++++++++++++++++
158
create mode 100644 accel/tcg/ldst_atomicity.c.inc
82
tests/tcg/riscv64/Makefile.target | 1 +
83
tests/tcg/s390x/Makefile.target | 1 +
84
tests/tcg/x86_64/Makefile.target | 3 +-
85
43 files changed, 966 insertions(+), 367 deletions(-)
86
create mode 100644 tests/tcg/riscv64/noexec.c
87
create mode 100644 tests/tcg/s390x/noexec.c
88
create mode 100644 tests/tcg/x86_64/noexec.c
89
create mode 100644 tests/tcg/multiarch/noexec.c.inc
diff view generated by jsdifflib
New patch
1
The REXW bit must be set to produce a 64-bit pointer result; the
2
bit is disabled in 32-bit mode, so we can do this unconditionally.
1
3
4
Fixes: 7d9e1ee424b0 ("tcg/i386: Adjust assert in tcg_out_addi_ptr")
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1592
6
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1642
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.c.inc | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
17
{
18
/* This function is only used for passing structs by reference. */
19
tcg_debug_assert(imm == (int32_t)imm);
20
- tcg_out_modrm_offset(s, OPC_LEA, rd, rs, imm);
21
+ tcg_out_modrm_offset(s, OPC_LEA | P_REXW, rd, rs, imm);
22
}
23
24
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
25
--
26
2.34.1
diff view generated by jsdifflib
New patch
1
This field may be used to describe the precise atomicity requirements
2
of the guest, which may then be used to constrain the methods by which
3
it may be emulated by the host.
1
4
5
For instance, the AArch64 LDP (32-bit) instruction changes semantics
6
with ARMv8.4 LSE2, from
7
8
MO_64 | MO_ATOM_IFALIGN_PAIR
9
(64-bits, single-copy atomic only on 4 byte units,
10
nonatomic if not aligned by 4),
11
12
to
13
14
MO_64 | MO_ATOM_WITHIN16
15
(64-bits, single-copy atomic within a 16 byte block)
16
17
The former may be implemented with two 4 byte loads, or a single 8 byte
18
load if that happens to be efficient on the host. The latter may not
19
be implemented with two 4 byte loads and may also require a helper when
20
misaligned.
21
22
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
include/exec/memop.h | 37 +++++++++++++++++++++++++++++++++++++
26
tcg/tcg.c | 27 +++++++++++++++++++++------
27
2 files changed, 58 insertions(+), 6 deletions(-)
28
29
diff --git a/include/exec/memop.h b/include/exec/memop.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/exec/memop.h
32
+++ b/include/exec/memop.h
33
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
34
MO_ALIGN_64 = 6 << MO_ASHIFT,
35
MO_ALIGN = MO_AMASK,
36
37
+ /*
38
+ * MO_ATOM_* describes the atomicity requirements of the operation:
39
+ * MO_ATOM_IFALIGN: the operation must be single-copy atomic if it
40
+ * is aligned; if unaligned there is no atomicity.
41
+ * MO_ATOM_IFALIGN_PAIR: the entire operation may be considered to
42
+ * be a pair of half-sized operations which are packed together
43
+ * for convenience, with single-copy atomicity on each half if
44
+ * the half is aligned.
45
+ * This is the atomicity e.g. of Arm pre-FEAT_LSE2 LDP.
46
+ * MO_ATOM_WITHIN16: the operation is single-copy atomic, even if it
47
+ * is unaligned, so long as it does not cross a 16-byte boundary;
48
+ * if it crosses a 16-byte boundary there is no atomicity.
49
+ * This is the atomicity e.g. of Arm FEAT_LSE2 LDR.
50
+ * MO_ATOM_WITHIN16_PAIR: the entire operation is single-copy atomic,
51
+ * if it happens to be within a 16-byte boundary, otherwise it
52
+ * devolves to a pair of half-sized MO_ATOM_WITHIN16 operations.
53
+ * Depending on alignment, one or both will be single-copy atomic.
54
+ * This is the atomicity e.g. of Arm FEAT_LSE2 LDP.
55
+ * MO_ATOM_SUBALIGN: the operation is single-copy atomic by parts
56
+ * by the alignment. E.g. if the address is 0 mod 4, then each
57
+ * 4-byte subobject is single-copy atomic.
58
+ * This is the atomicity e.g. of IBM Power.
59
+ * MO_ATOM_NONE: the operation has no atomicity requirements.
60
+ *
61
+ * Note the default (i.e. 0) value is single-copy atomic to the
62
+ * size of the operation, if aligned. This retains the behaviour
63
+ * from before this field was introduced.
64
+ */
65
+ MO_ATOM_SHIFT = 8,
66
+ MO_ATOM_IFALIGN = 0 << MO_ATOM_SHIFT,
67
+ MO_ATOM_IFALIGN_PAIR = 1 << MO_ATOM_SHIFT,
68
+ MO_ATOM_WITHIN16 = 2 << MO_ATOM_SHIFT,
69
+ MO_ATOM_WITHIN16_PAIR = 3 << MO_ATOM_SHIFT,
70
+ MO_ATOM_SUBALIGN = 4 << MO_ATOM_SHIFT,
71
+ MO_ATOM_NONE = 5 << MO_ATOM_SHIFT,
72
+ MO_ATOM_MASK = 7 << MO_ATOM_SHIFT,
73
+
74
/* Combinations of the above, for ease of use. */
75
MO_UB = MO_8,
76
MO_UW = MO_16,
77
diff --git a/tcg/tcg.c b/tcg/tcg.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/tcg.c
80
+++ b/tcg/tcg.c
81
@@ -XXX,XX +XXX,XX @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
82
[MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
83
};
84
85
+static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
86
+ [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
87
+ [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
88
+ [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
89
+ [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
90
+ [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
91
+ [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
92
+};
93
+
94
static const char bswap_flag_name[][6] = {
95
[TCG_BSWAP_IZ] = "iz",
96
[TCG_BSWAP_OZ] = "oz",
97
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
98
case INDEX_op_qemu_ld_i64:
99
case INDEX_op_qemu_st_i64:
100
{
101
+ const char *s_al, *s_op, *s_at;
102
MemOpIdx oi = op->args[k++];
103
MemOp op = get_memop(oi);
104
unsigned ix = get_mmuidx(oi);
105
106
- if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
107
- col += ne_fprintf(f, ",$0x%x,%u", op, ix);
108
+ s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
109
+ s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
110
+ s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
111
+ op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
112
+
113
+ /* If all fields are accounted for, print symbolically. */
114
+ if (!op && s_al && s_op && s_at) {
115
+ col += ne_fprintf(f, ",%s%s%s,%u",
116
+ s_at, s_al, s_op, ix);
117
} else {
118
- const char *s_al, *s_op;
119
- s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
120
- s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
121
- col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
122
+ op = get_memop(oi);
123
+ col += ne_fprintf(f, ",$0x%x,%u", op, ix);
124
}
125
i = 1;
126
}
127
--
128
2.34.1
diff view generated by jsdifflib
1
The current implementation is a no-op, simply returning addr.
1
Create ldst_atomicity.c.inc.
2
This is incorrect, because we ought to be checking the page
3
permissions for execution.
4
2
5
Make get_page_addr_code inline for both implementations.
3
Not required for user-only code loads, because we've ensured that
4
the page is read-only before beginning to translate code.
6
5
7
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
9
Acked-by: Alistair Francis <alistair.francis@wdc.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
8
---
12
include/exec/exec-all.h | 85 ++++++++++++++---------------------------
9
accel/tcg/cputlb.c | 175 +++++++---
13
accel/tcg/cputlb.c | 5 ---
10
accel/tcg/user-exec.c | 26 +-
14
accel/tcg/user-exec.c | 14 +++++++
11
accel/tcg/ldst_atomicity.c.inc | 566 +++++++++++++++++++++++++++++++++
15
3 files changed, 42 insertions(+), 62 deletions(-)
12
3 files changed, 716 insertions(+), 51 deletions(-)
13
create mode 100644 accel/tcg/ldst_atomicity.c.inc
16
14
17
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/exec-all.h
20
+++ b/include/exec/exec-all.h
21
@@ -XXX,XX +XXX,XX @@ struct MemoryRegionSection *iotlb_to_section(CPUState *cpu,
22
hwaddr index, MemTxAttrs attrs);
23
#endif
24
25
-#if defined(CONFIG_USER_ONLY)
26
-void mmap_lock(void);
27
-void mmap_unlock(void);
28
-bool have_mmap_lock(void);
29
-
30
/**
31
- * get_page_addr_code() - user-mode version
32
+ * get_page_addr_code_hostp()
33
* @env: CPUArchState
34
* @addr: guest virtual address of guest code
35
*
36
- * Returns @addr.
37
+ * See get_page_addr_code() (full-system version) for documentation on the
38
+ * return value.
39
+ *
40
+ * Sets *@hostp (when @hostp is non-NULL) as follows.
41
+ * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
42
+ * to the host address where @addr's content is kept.
43
+ *
44
+ * Note: this function can trigger an exception.
45
+ */
46
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
47
+ void **hostp);
48
+
49
+/**
50
+ * get_page_addr_code()
51
+ * @env: CPUArchState
52
+ * @addr: guest virtual address of guest code
53
+ *
54
+ * If we cannot translate and execute from the entire RAM page, or if
55
+ * the region is not backed by RAM, returns -1. Otherwise, returns the
56
+ * ram_addr_t corresponding to the guest code at @addr.
57
+ *
58
+ * Note: this function can trigger an exception.
59
*/
60
static inline tb_page_addr_t get_page_addr_code(CPUArchState *env,
61
target_ulong addr)
62
{
63
- return addr;
64
+ return get_page_addr_code_hostp(env, addr, NULL);
65
}
66
67
-/**
68
- * get_page_addr_code_hostp() - user-mode version
69
- * @env: CPUArchState
70
- * @addr: guest virtual address of guest code
71
- *
72
- * Returns @addr.
73
- *
74
- * If @hostp is non-NULL, sets *@hostp to the host address where @addr's content
75
- * is kept.
76
- */
77
-static inline tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env,
78
- target_ulong addr,
79
- void **hostp)
80
-{
81
- if (hostp) {
82
- *hostp = g2h_untagged(addr);
83
- }
84
- return addr;
85
-}
86
+#if defined(CONFIG_USER_ONLY)
87
+void mmap_lock(void);
88
+void mmap_unlock(void);
89
+bool have_mmap_lock(void);
90
91
/**
92
* adjust_signal_pc:
93
@@ -XXX,XX +XXX,XX @@ G_NORETURN void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr,
94
static inline void mmap_lock(void) {}
95
static inline void mmap_unlock(void) {}
96
97
-/**
98
- * get_page_addr_code() - full-system version
99
- * @env: CPUArchState
100
- * @addr: guest virtual address of guest code
101
- *
102
- * If we cannot translate and execute from the entire RAM page, or if
103
- * the region is not backed by RAM, returns -1. Otherwise, returns the
104
- * ram_addr_t corresponding to the guest code at @addr.
105
- *
106
- * Note: this function can trigger an exception.
107
- */
108
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr);
109
-
110
-/**
111
- * get_page_addr_code_hostp() - full-system version
112
- * @env: CPUArchState
113
- * @addr: guest virtual address of guest code
114
- *
115
- * See get_page_addr_code() (full-system version) for documentation on the
116
- * return value.
117
- *
118
- * Sets *@hostp (when @hostp is non-NULL) as follows.
119
- * If the return value is -1, sets *@hostp to NULL. Otherwise, sets *@hostp
120
- * to the host address where @addr's content is kept.
121
- *
122
- * Note: this function can trigger an exception.
123
- */
124
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
125
- void **hostp);
126
-
127
void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length);
128
void tlb_set_dirty(CPUState *cpu, target_ulong vaddr);
129
130
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
15
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
131
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
132
--- a/accel/tcg/cputlb.c
17
--- a/accel/tcg/cputlb.c
133
+++ b/accel/tcg/cputlb.c
18
+++ b/accel/tcg/cputlb.c
134
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
19
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
135
return qemu_ram_addr_from_host_nofail(p);
20
return qemu_ram_addr_from_host_nofail(p);
136
}
21
}
137
22
138
-tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
23
+/* Load/store with atomicity primitives. */
24
+#include "ldst_atomicity.c.inc"
25
+
26
#ifdef CONFIG_PLUGIN
27
/*
28
* Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
29
@@ -XXX,XX +XXX,XX @@ static void validate_memop(MemOpIdx oi, MemOp expected)
30
* specifically for reading instructions from system memory. It is
31
* called by the translation loop and in some helpers where the code
32
* is disassembled. It shouldn't be called directly by guest code.
33
- */
34
-
35
-typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
36
- MemOpIdx oi, uintptr_t retaddr);
37
-
38
-static inline uint64_t QEMU_ALWAYS_INLINE
39
-load_memop(const void *haddr, MemOp op)
139
-{
40
-{
140
- return get_page_addr_code_hostp(env, addr, NULL);
41
- switch (op) {
42
- case MO_UB:
43
- return ldub_p(haddr);
44
- case MO_BEUW:
45
- return lduw_be_p(haddr);
46
- case MO_LEUW:
47
- return lduw_le_p(haddr);
48
- case MO_BEUL:
49
- return (uint32_t)ldl_be_p(haddr);
50
- case MO_LEUL:
51
- return (uint32_t)ldl_le_p(haddr);
52
- case MO_BEUQ:
53
- return ldq_be_p(haddr);
54
- case MO_LEUQ:
55
- return ldq_le_p(haddr);
56
- default:
57
- qemu_build_not_reached();
58
- }
141
-}
59
-}
142
-
60
-
143
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
61
-/*
144
CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
62
+ *
63
* For the benefit of TCG generated code, we want to avoid the
64
* complication of ABI-specific return type promotion and always
65
* return a value extended to the register size of the host. This is
66
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
67
return ret_be;
68
}
69
70
+/**
71
+ * do_ld_parts_beN
72
+ * @p: translation parameters
73
+ * @ret_be: accumulated data
74
+ *
75
+ * As do_ld_bytes_beN, but atomically on each aligned part.
76
+ */
77
+static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
78
+{
79
+ void *haddr = p->haddr;
80
+ int size = p->size;
81
+
82
+ do {
83
+ uint64_t x;
84
+ int n;
85
+
86
+ /*
87
+ * Find minimum of alignment and size.
88
+ * This is slightly stronger than required by MO_ATOM_SUBALIGN, which
89
+ * would have only checked the low bits of addr|size once at the start,
90
+ * but is just as easy.
91
+ */
92
+ switch (((uintptr_t)haddr | size) & 7) {
93
+ case 4:
94
+ x = cpu_to_be32(load_atomic4(haddr));
95
+ ret_be = (ret_be << 32) | x;
96
+ n = 4;
97
+ break;
98
+ case 2:
99
+ case 6:
100
+ x = cpu_to_be16(load_atomic2(haddr));
101
+ ret_be = (ret_be << 16) | x;
102
+ n = 2;
103
+ break;
104
+ default:
105
+ x = *(uint8_t *)haddr;
106
+ ret_be = (ret_be << 8) | x;
107
+ n = 1;
108
+ break;
109
+ case 0:
110
+ g_assert_not_reached();
111
+ }
112
+ haddr += n;
113
+ size -= n;
114
+ } while (size != 0);
115
+ return ret_be;
116
+}
117
+
118
+/**
119
+ * do_ld_parts_be4
120
+ * @p: translation parameters
121
+ * @ret_be: accumulated data
122
+ *
123
+ * As do_ld_bytes_beN, but with one atomic load.
124
+ * Four aligned bytes are guaranteed to cover the load.
125
+ */
126
+static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
127
+{
128
+ int o = p->addr & 3;
129
+ uint32_t x = load_atomic4(p->haddr - o);
130
+
131
+ x = cpu_to_be32(x);
132
+ x <<= o * 8;
133
+ x >>= (4 - p->size) * 8;
134
+ return (ret_be << (p->size * 8)) | x;
135
+}
136
+
137
+/**
138
+ * do_ld_parts_be8
139
+ * @p: translation parameters
140
+ * @ret_be: accumulated data
141
+ *
142
+ * As do_ld_bytes_beN, but with one atomic load.
143
+ * Eight aligned bytes are guaranteed to cover the load.
144
+ */
145
+static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
146
+ MMULookupPageData *p, uint64_t ret_be)
147
+{
148
+ int o = p->addr & 7;
149
+ uint64_t x = load_atomic8_or_exit(env, ra, p->haddr - o);
150
+
151
+ x = cpu_to_be64(x);
152
+ x <<= o * 8;
153
+ x >>= (8 - p->size) * 8;
154
+ return (ret_be << (p->size * 8)) | x;
155
+}
156
+
157
/*
158
* Wrapper for the above.
159
*/
160
static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
161
- uint64_t ret_be, int mmu_idx,
162
- MMUAccessType type, uintptr_t ra)
163
+ uint64_t ret_be, int mmu_idx, MMUAccessType type,
164
+ MemOp mop, uintptr_t ra)
145
{
165
{
166
+ MemOp atom;
167
+ unsigned tmp, half_size;
168
+
169
if (unlikely(p->flags & TLB_MMIO)) {
170
return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
171
- } else {
172
+ }
173
+
174
+ /*
175
+ * It is a given that we cross a page and therefore there is no
176
+ * atomicity for the load as a whole, but subobjects may need attention.
177
+ */
178
+ atom = mop & MO_ATOM_MASK;
179
+ switch (atom) {
180
+ case MO_ATOM_SUBALIGN:
181
+ return do_ld_parts_beN(p, ret_be);
182
+
183
+ case MO_ATOM_IFALIGN_PAIR:
184
+ case MO_ATOM_WITHIN16_PAIR:
185
+ tmp = mop & MO_SIZE;
186
+ tmp = tmp ? tmp - 1 : 0;
187
+ half_size = 1 << tmp;
188
+ if (atom == MO_ATOM_IFALIGN_PAIR
189
+ ? p->size == half_size
190
+ : p->size >= half_size) {
191
+ if (!HAVE_al8_fast && p->size < 4) {
192
+ return do_ld_whole_be4(p, ret_be);
193
+ } else {
194
+ return do_ld_whole_be8(env, ra, p, ret_be);
195
+ }
196
+ }
197
+ /* fall through */
198
+
199
+ case MO_ATOM_IFALIGN:
200
+ case MO_ATOM_WITHIN16:
201
+ case MO_ATOM_NONE:
202
return do_ld_bytes_beN(p, ret_be);
203
+
204
+ default:
205
+ g_assert_not_reached();
206
}
207
}
208
209
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
210
}
211
212
/* Perform the load host endian, then swap if necessary. */
213
- ret = load_memop(p->haddr, MO_UW);
214
+ ret = load_atom_2(env, ra, p->haddr, memop);
215
if (memop & MO_BSWAP) {
216
ret = bswap16(ret);
217
}
218
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
219
}
220
221
/* Perform the load host endian. */
222
- ret = load_memop(p->haddr, MO_UL);
223
+ ret = load_atom_4(env, ra, p->haddr, memop);
224
if (memop & MO_BSWAP) {
225
ret = bswap32(ret);
226
}
227
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
228
}
229
230
/* Perform the load host endian. */
231
- ret = load_memop(p->haddr, MO_UQ);
232
+ ret = load_atom_8(env, ra, p->haddr, memop);
233
if (memop & MO_BSWAP) {
234
ret = bswap64(ret);
235
}
236
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
237
return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
238
}
239
240
- ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
241
- ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
242
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
243
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
244
if ((l.memop & MO_BSWAP) == MO_LE) {
245
ret = bswap32(ret);
246
}
247
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
248
return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
249
}
250
251
- ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
252
- ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
253
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
254
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
255
if ((l.memop & MO_BSWAP) == MO_LE) {
256
ret = bswap64(ret);
257
}
146
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
258
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
147
index XXXXXXX..XXXXXXX 100644
259
index XXXXXXX..XXXXXXX 100644
148
--- a/accel/tcg/user-exec.c
260
--- a/accel/tcg/user-exec.c
149
+++ b/accel/tcg/user-exec.c
261
+++ b/accel/tcg/user-exec.c
150
@@ -XXX,XX +XXX,XX @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,
262
@@ -XXX,XX +XXX,XX @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
151
return size ? g2h(env_cpu(env), addr) : NULL;
263
return ret;
152
}
264
}
153
265
154
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
266
+#include "ldst_atomicity.c.inc"
155
+ void **hostp)
267
+
156
+{
268
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
157
+ int flags;
269
MemOpIdx oi, uintptr_t ra)
158
+
270
{
159
+ flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
271
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
160
+ g_assert(flags == 0);
272
161
+
273
validate_memop(oi, MO_BEUW);
162
+ if (hostp) {
274
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
163
+ *hostp = g2h_untagged(addr);
275
- ret = lduw_be_p(haddr);
164
+ }
276
+ ret = load_atom_2(env, ra, haddr, get_memop(oi));
165
+ return addr;
277
clear_helper_retaddr();
166
+}
278
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
167
+
279
- return ret;
168
/* The softmmu versions of these helpers are in cputlb.c. */
280
+ return cpu_to_be16(ret);
169
281
}
170
/*
282
283
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
284
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
285
286
validate_memop(oi, MO_BEUL);
287
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
288
- ret = ldl_be_p(haddr);
289
+ ret = load_atom_4(env, ra, haddr, get_memop(oi));
290
clear_helper_retaddr();
291
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
292
- return ret;
293
+ return cpu_to_be32(ret);
294
}
295
296
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
297
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
298
299
validate_memop(oi, MO_BEUQ);
300
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
301
- ret = ldq_be_p(haddr);
302
+ ret = load_atom_8(env, ra, haddr, get_memop(oi));
303
clear_helper_retaddr();
304
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
305
- return ret;
306
+ return cpu_to_be64(ret);
307
}
308
309
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
310
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
311
312
validate_memop(oi, MO_LEUW);
313
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
314
- ret = lduw_le_p(haddr);
315
+ ret = load_atom_2(env, ra, haddr, get_memop(oi));
316
clear_helper_retaddr();
317
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
318
- return ret;
319
+ return cpu_to_le16(ret);
320
}
321
322
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
323
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
324
325
validate_memop(oi, MO_LEUL);
326
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
327
- ret = ldl_le_p(haddr);
328
+ ret = load_atom_4(env, ra, haddr, get_memop(oi));
329
clear_helper_retaddr();
330
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
331
- return ret;
332
+ return cpu_to_le32(ret);
333
}
334
335
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
336
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
337
338
validate_memop(oi, MO_LEUQ);
339
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
340
- ret = ldq_le_p(haddr);
341
+ ret = load_atom_8(env, ra, haddr, get_memop(oi));
342
clear_helper_retaddr();
343
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
344
- return ret;
345
+ return cpu_to_le64(ret);
346
}
347
348
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
349
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
350
new file mode 100644
351
index XXXXXXX..XXXXXXX
352
--- /dev/null
353
+++ b/accel/tcg/ldst_atomicity.c.inc
354
@@ -XXX,XX +XXX,XX @@
355
+/*
356
+ * Routines common to user and system emulation of load/store.
357
+ *
358
+ * Copyright (c) 2022 Linaro, Ltd.
359
+ *
360
+ * SPDX-License-Identifier: GPL-2.0-or-later
361
+ *
362
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
363
+ * See the COPYING file in the top-level directory.
364
+ */
365
+
366
+#ifdef CONFIG_ATOMIC64
367
+# define HAVE_al8 true
368
+#else
369
+# define HAVE_al8 false
370
+#endif
371
+#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
372
+
373
+#if defined(CONFIG_ATOMIC128)
374
+# define HAVE_al16_fast true
375
+#else
376
+# define HAVE_al16_fast false
377
+#endif
378
+
379
+/**
380
+ * required_atomicity:
381
+ *
382
+ * Return the lg2 bytes of atomicity required by @memop for @p.
383
+ * If the operation must be split into two operations to be
384
+ * examined separately for atomicity, return -lg2.
385
+ */
386
+static int required_atomicity(CPUArchState *env, uintptr_t p, MemOp memop)
387
+{
388
+ MemOp atom = memop & MO_ATOM_MASK;
389
+ MemOp size = memop & MO_SIZE;
390
+ MemOp half = size ? size - 1 : 0;
391
+ unsigned tmp;
392
+ int atmax;
393
+
394
+ switch (atom) {
395
+ case MO_ATOM_NONE:
396
+ atmax = MO_8;
397
+ break;
398
+
399
+ case MO_ATOM_IFALIGN_PAIR:
400
+ size = half;
401
+ /* fall through */
402
+
403
+ case MO_ATOM_IFALIGN:
404
+ tmp = (1 << size) - 1;
405
+ atmax = p & tmp ? MO_8 : size;
406
+ break;
407
+
408
+ case MO_ATOM_WITHIN16:
409
+ tmp = p & 15;
410
+ atmax = (tmp + (1 << size) <= 16 ? size : MO_8);
411
+ break;
412
+
413
+ case MO_ATOM_WITHIN16_PAIR:
414
+ tmp = p & 15;
415
+ if (tmp + (1 << size) <= 16) {
416
+ atmax = size;
417
+ } else if (tmp + (1 << half) == 16) {
418
+ /*
419
+ * The pair exactly straddles the boundary.
420
+ * Both halves are naturally aligned and atomic.
421
+ */
422
+ atmax = half;
423
+ } else {
424
+ /*
425
+ * One of the pair crosses the boundary, and is non-atomic.
426
+ * The other of the pair does not cross, and is atomic.
427
+ */
428
+ atmax = -half;
429
+ }
430
+ break;
431
+
432
+ case MO_ATOM_SUBALIGN:
433
+ /*
434
+ * Examine the alignment of p to determine if there are subobjects
435
+ * that must be aligned. Note that we only really need ctz4() --
436
+ * any more sigificant bits are discarded by the immediately
437
+ * following comparison.
438
+ */
439
+ tmp = ctz32(p);
440
+ atmax = MIN(size, tmp);
441
+ break;
442
+
443
+ default:
444
+ g_assert_not_reached();
445
+ }
446
+
447
+ /*
448
+ * Here we have the architectural atomicity of the operation.
449
+ * However, when executing in a serial context, we need no extra
450
+ * host atomicity in order to avoid racing. This reduction
451
+ * avoids looping with cpu_loop_exit_atomic.
452
+ */
453
+ if (cpu_in_serial_context(env_cpu(env))) {
454
+ return MO_8;
455
+ }
456
+ return atmax;
457
+}
458
+
459
+/**
460
+ * load_atomic2:
461
+ * @pv: host address
462
+ *
463
+ * Atomically load 2 aligned bytes from @pv.
464
+ */
465
+static inline uint16_t load_atomic2(void *pv)
466
+{
467
+ uint16_t *p = __builtin_assume_aligned(pv, 2);
468
+ return qatomic_read(p);
469
+}
470
+
471
+/**
472
+ * load_atomic4:
473
+ * @pv: host address
474
+ *
475
+ * Atomically load 4 aligned bytes from @pv.
476
+ */
477
+static inline uint32_t load_atomic4(void *pv)
478
+{
479
+ uint32_t *p = __builtin_assume_aligned(pv, 4);
480
+ return qatomic_read(p);
481
+}
482
+
483
+/**
484
+ * load_atomic8:
485
+ * @pv: host address
486
+ *
487
+ * Atomically load 8 aligned bytes from @pv.
488
+ */
489
+static inline uint64_t load_atomic8(void *pv)
490
+{
491
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
492
+
493
+ qemu_build_assert(HAVE_al8);
494
+ return qatomic_read__nocheck(p);
495
+}
496
+
497
+/**
498
+ * load_atomic16:
499
+ * @pv: host address
500
+ *
501
+ * Atomically load 16 aligned bytes from @pv.
502
+ */
503
+static inline Int128 load_atomic16(void *pv)
504
+{
505
+#ifdef CONFIG_ATOMIC128
506
+ __uint128_t *p = __builtin_assume_aligned(pv, 16);
507
+ Int128Alias r;
508
+
509
+ r.u = qatomic_read__nocheck(p);
510
+ return r.s;
511
+#else
512
+ qemu_build_not_reached();
513
+#endif
514
+}
515
+
516
+/**
517
+ * load_atomic8_or_exit:
518
+ * @env: cpu context
519
+ * @ra: host unwind address
520
+ * @pv: host address
521
+ *
522
+ * Atomically load 8 aligned bytes from @pv.
523
+ * If this is not possible, longjmp out to restart serially.
524
+ */
525
+static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
526
+{
527
+ if (HAVE_al8) {
528
+ return load_atomic8(pv);
529
+ }
530
+
531
+#ifdef CONFIG_USER_ONLY
532
+ /*
533
+ * If the page is not writable, then assume the value is immutable
534
+ * and requires no locking. This ignores the case of MAP_SHARED with
535
+ * another process, because the fallback start_exclusive solution
536
+ * provides no protection across processes.
537
+ */
538
+ if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) {
539
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
540
+ return *p;
541
+ }
542
+#endif
543
+
544
+ /* Ultimate fallback: re-execute in serial context. */
545
+ cpu_loop_exit_atomic(env_cpu(env), ra);
546
+}
547
+
548
+/**
549
+ * load_atomic16_or_exit:
550
+ * @env: cpu context
551
+ * @ra: host unwind address
552
+ * @pv: host address
553
+ *
554
+ * Atomically load 16 aligned bytes from @pv.
555
+ * If this is not possible, longjmp out to restart serially.
556
+ */
557
+static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
558
+{
559
+ Int128 *p = __builtin_assume_aligned(pv, 16);
560
+
561
+ if (HAVE_al16_fast) {
562
+ return load_atomic16(p);
563
+ }
564
+
565
+#ifdef CONFIG_USER_ONLY
566
+ /*
567
+ * We can only use cmpxchg to emulate a load if the page is writable.
568
+ * If the page is not writable, then assume the value is immutable
569
+ * and requires no locking. This ignores the case of MAP_SHARED with
570
+ * another process, because the fallback start_exclusive solution
571
+ * provides no protection across processes.
572
+ */
573
+ if (!page_check_range(h2g(p), 16, PAGE_WRITE)) {
574
+ return *p;
575
+ }
576
+#endif
577
+
578
+ /*
579
+ * In system mode all guest pages are writable, and for user-only
580
+ * we have just checked writability. Try cmpxchg.
581
+ */
582
+#if defined(CONFIG_CMPXCHG128)
583
+ /* Swap 0 with 0, with the side-effect of returning the old value. */
584
+ {
585
+ Int128Alias r;
586
+ r.u = __sync_val_compare_and_swap_16((__uint128_t *)p, 0, 0);
587
+ return r.s;
588
+ }
589
+#endif
590
+
591
+ /* Ultimate fallback: re-execute in serial context. */
592
+ cpu_loop_exit_atomic(env_cpu(env), ra);
593
+}
594
+
595
+/**
596
+ * load_atom_extract_al4x2:
597
+ * @pv: host address
598
+ *
599
+ * Load 4 bytes from @p, from two sequential atomic 4-byte loads.
600
+ */
601
+static uint32_t load_atom_extract_al4x2(void *pv)
602
+{
603
+ uintptr_t pi = (uintptr_t)pv;
604
+ int sh = (pi & 3) * 8;
605
+ uint32_t a, b;
606
+
607
+ pv = (void *)(pi & ~3);
608
+ a = load_atomic4(pv);
609
+ b = load_atomic4(pv + 4);
610
+
611
+ if (HOST_BIG_ENDIAN) {
612
+ return (a << sh) | (b >> (-sh & 31));
613
+ } else {
614
+ return (a >> sh) | (b << (-sh & 31));
615
+ }
616
+}
617
+
618
+/**
619
+ * load_atom_extract_al8x2:
620
+ * @pv: host address
621
+ *
622
+ * Load 8 bytes from @p, from two sequential atomic 8-byte loads.
623
+ */
624
+static uint64_t load_atom_extract_al8x2(void *pv)
625
+{
626
+ uintptr_t pi = (uintptr_t)pv;
627
+ int sh = (pi & 7) * 8;
628
+ uint64_t a, b;
629
+
630
+ pv = (void *)(pi & ~7);
631
+ a = load_atomic8(pv);
632
+ b = load_atomic8(pv + 8);
633
+
634
+ if (HOST_BIG_ENDIAN) {
635
+ return (a << sh) | (b >> (-sh & 63));
636
+ } else {
637
+ return (a >> sh) | (b << (-sh & 63));
638
+ }
639
+}
640
+
641
+/**
642
+ * load_atom_extract_al8_or_exit:
643
+ * @env: cpu context
644
+ * @ra: host unwind address
645
+ * @pv: host address
646
+ * @s: object size in bytes, @s <= 4.
647
+ *
648
+ * Atomically load @s bytes from @p, when p % s != 0, and [p, p+s-1] does
649
+ * not cross an 8-byte boundary. This means that we can perform an atomic
650
+ * 8-byte load and extract.
651
+ * The value is returned in the low bits of a uint32_t.
652
+ */
653
+static uint32_t load_atom_extract_al8_or_exit(CPUArchState *env, uintptr_t ra,
654
+ void *pv, int s)
655
+{
656
+ uintptr_t pi = (uintptr_t)pv;
657
+ int o = pi & 7;
658
+ int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
659
+
660
+ pv = (void *)(pi & ~7);
661
+ return load_atomic8_or_exit(env, ra, pv) >> shr;
662
+}
663
+
664
+/**
665
+ * load_atom_extract_al16_or_exit:
666
+ * @env: cpu context
667
+ * @ra: host unwind address
668
+ * @p: host address
669
+ * @s: object size in bytes, @s <= 8.
670
+ *
671
+ * Atomically load @s bytes from @p, when p % 16 < 8
672
+ * and p % 16 + s > 8. I.e. does not cross a 16-byte
673
+ * boundary, but *does* cross an 8-byte boundary.
674
+ * This is the slow version, so we must have eliminated
675
+ * any faster load_atom_extract_al8_or_exit case.
676
+ *
677
+ * If this is not possible, longjmp out to restart serially.
678
+ */
679
+static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
680
+ void *pv, int s)
681
+{
682
+ uintptr_t pi = (uintptr_t)pv;
683
+ int o = pi & 7;
684
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
685
+ Int128 r;
686
+
687
+ /*
688
+ * Note constraints above: p & 8 must be clear.
689
+ * Provoke SIGBUS if possible otherwise.
690
+ */
691
+ pv = (void *)(pi & ~7);
692
+ r = load_atomic16_or_exit(env, ra, pv);
693
+
694
+ r = int128_urshift(r, shr);
695
+ return int128_getlo(r);
696
+}
697
+
698
+/**
699
+ * load_atom_extract_al16_or_al8:
700
+ * @p: host address
701
+ * @s: object size in bytes, @s <= 8.
702
+ *
703
+ * Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not
704
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
705
+ * otherwise the access must be 8-byte atomic.
706
+ */
707
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
708
+{
709
+#if defined(CONFIG_ATOMIC128)
710
+ uintptr_t pi = (uintptr_t)pv;
711
+ int o = pi & 7;
712
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
713
+ __uint128_t r;
714
+
715
+ pv = (void *)(pi & ~7);
716
+ if (pi & 8) {
717
+ uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
718
+ uint64_t a = qatomic_read__nocheck(p8);
719
+ uint64_t b = qatomic_read__nocheck(p8 + 1);
720
+
721
+ if (HOST_BIG_ENDIAN) {
722
+ r = ((__uint128_t)a << 64) | b;
723
+ } else {
724
+ r = ((__uint128_t)b << 64) | a;
725
+ }
726
+ } else {
727
+ __uint128_t *p16 = __builtin_assume_aligned(pv, 16, 0);
728
+ r = qatomic_read__nocheck(p16);
729
+ }
730
+ return r >> shr;
731
+#else
732
+ qemu_build_not_reached();
733
+#endif
734
+}
735
+
736
+/**
737
+ * load_atom_4_by_2:
738
+ * @pv: host address
739
+ *
740
+ * Load 4 bytes from @pv, with two 2-byte atomic loads.
741
+ */
742
+static inline uint32_t load_atom_4_by_2(void *pv)
743
+{
744
+ uint32_t a = load_atomic2(pv);
745
+ uint32_t b = load_atomic2(pv + 2);
746
+
747
+ if (HOST_BIG_ENDIAN) {
748
+ return (a << 16) | b;
749
+ } else {
750
+ return (b << 16) | a;
751
+ }
752
+}
753
+
754
+/**
755
+ * load_atom_8_by_2:
756
+ * @pv: host address
757
+ *
758
+ * Load 8 bytes from @pv, with four 2-byte atomic loads.
759
+ */
760
+static inline uint64_t load_atom_8_by_2(void *pv)
761
+{
762
+ uint32_t a = load_atom_4_by_2(pv);
763
+ uint32_t b = load_atom_4_by_2(pv + 4);
764
+
765
+ if (HOST_BIG_ENDIAN) {
766
+ return ((uint64_t)a << 32) | b;
767
+ } else {
768
+ return ((uint64_t)b << 32) | a;
769
+ }
770
+}
771
+
772
+/**
773
+ * load_atom_8_by_4:
774
+ * @pv: host address
775
+ *
776
+ * Load 8 bytes from @pv, with two 4-byte atomic loads.
777
+ */
778
+static inline uint64_t load_atom_8_by_4(void *pv)
779
+{
780
+ uint32_t a = load_atomic4(pv);
781
+ uint32_t b = load_atomic4(pv + 4);
782
+
783
+ if (HOST_BIG_ENDIAN) {
784
+ return ((uint64_t)a << 32) | b;
785
+ } else {
786
+ return ((uint64_t)b << 32) | a;
787
+ }
788
+}
789
+
790
+/**
791
+ * load_atom_2:
792
+ * @p: host address
793
+ * @memop: the full memory op
794
+ *
795
+ * Load 2 bytes from @p, honoring the atomicity of @memop.
796
+ */
797
+static uint16_t load_atom_2(CPUArchState *env, uintptr_t ra,
798
+ void *pv, MemOp memop)
799
+{
800
+ uintptr_t pi = (uintptr_t)pv;
801
+ int atmax;
802
+
803
+ if (likely((pi & 1) == 0)) {
804
+ return load_atomic2(pv);
805
+ }
806
+ if (HAVE_al16_fast) {
807
+ return load_atom_extract_al16_or_al8(pv, 2);
808
+ }
809
+
810
+ atmax = required_atomicity(env, pi, memop);
811
+ switch (atmax) {
812
+ case MO_8:
813
+ return lduw_he_p(pv);
814
+ case MO_16:
815
+ /* The only case remaining is MO_ATOM_WITHIN16. */
816
+ if (!HAVE_al8_fast && (pi & 3) == 1) {
817
+ /* Big or little endian, we want the middle two bytes. */
818
+ return load_atomic4(pv - 1) >> 8;
819
+ }
820
+ if ((pi & 15) != 7) {
821
+ return load_atom_extract_al8_or_exit(env, ra, pv, 2);
822
+ }
823
+ return load_atom_extract_al16_or_exit(env, ra, pv, 2);
824
+ default:
825
+ g_assert_not_reached();
826
+ }
827
+}
828
+
829
+/**
830
+ * load_atom_4:
831
+ * @p: host address
832
+ * @memop: the full memory op
833
+ *
834
+ * Load 4 bytes from @p, honoring the atomicity of @memop.
835
+ */
836
+static uint32_t load_atom_4(CPUArchState *env, uintptr_t ra,
837
+ void *pv, MemOp memop)
838
+{
839
+ uintptr_t pi = (uintptr_t)pv;
840
+ int atmax;
841
+
842
+ if (likely((pi & 3) == 0)) {
843
+ return load_atomic4(pv);
844
+ }
845
+ if (HAVE_al16_fast) {
846
+ return load_atom_extract_al16_or_al8(pv, 4);
847
+ }
848
+
849
+ atmax = required_atomicity(env, pi, memop);
850
+ switch (atmax) {
851
+ case MO_8:
852
+ case MO_16:
853
+ case -MO_16:
854
+ /*
855
+ * For MO_ATOM_IFALIGN, this is more atomicity than required,
856
+ * but it's trivially supported on all hosts, better than 4
857
+ * individual byte loads (when the host requires alignment),
858
+ * and overlaps with the MO_ATOM_SUBALIGN case of p % 2 == 0.
859
+ */
860
+ return load_atom_extract_al4x2(pv);
861
+ case MO_32:
862
+ if (!(pi & 4)) {
863
+ return load_atom_extract_al8_or_exit(env, ra, pv, 4);
864
+ }
865
+ return load_atom_extract_al16_or_exit(env, ra, pv, 4);
866
+ default:
867
+ g_assert_not_reached();
868
+ }
869
+}
870
+
871
+/**
872
+ * load_atom_8:
873
+ * @p: host address
874
+ * @memop: the full memory op
875
+ *
876
+ * Load 8 bytes from @p, honoring the atomicity of @memop.
877
+ */
878
+static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
879
+ void *pv, MemOp memop)
880
+{
881
+ uintptr_t pi = (uintptr_t)pv;
882
+ int atmax;
883
+
884
+ /*
885
+ * If the host does not support 8-byte atomics, wait until we have
886
+ * examined the atomicity parameters below.
887
+ */
888
+ if (HAVE_al8 && likely((pi & 7) == 0)) {
889
+ return load_atomic8(pv);
890
+ }
891
+ if (HAVE_al16_fast) {
892
+ return load_atom_extract_al16_or_al8(pv, 8);
893
+ }
894
+
895
+ atmax = required_atomicity(env, pi, memop);
896
+ if (atmax == MO_64) {
897
+ if (!HAVE_al8 && (pi & 7) == 0) {
898
+ load_atomic8_or_exit(env, ra, pv);
899
+ }
900
+ return load_atom_extract_al16_or_exit(env, ra, pv, 8);
901
+ }
902
+ if (HAVE_al8_fast) {
903
+ return load_atom_extract_al8x2(pv);
904
+ }
905
+ switch (atmax) {
906
+ case MO_8:
907
+ return ldq_he_p(pv);
908
+ case MO_16:
909
+ return load_atom_8_by_2(pv);
910
+ case MO_32:
911
+ return load_atom_8_by_4(pv);
912
+ case -MO_32:
913
+ if (HAVE_al8) {
914
+ return load_atom_extract_al8x2(pv);
915
+ }
916
+ cpu_loop_exit_atomic(env_cpu(env), ra);
917
+ default:
918
+ g_assert_not_reached();
919
+ }
920
+}
171
--
921
--
172
2.34.1
922
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
accel/tcg/cputlb.c | 108 ++++----
5
accel/tcg/user-exec.c | 12 +-
6
accel/tcg/ldst_atomicity.c.inc | 491 +++++++++++++++++++++++++++++++++
7
3 files changed, 545 insertions(+), 66 deletions(-)
1
8
9
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/accel/tcg/cputlb.c
12
+++ b/accel/tcg/cputlb.c
13
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
14
* Store Helpers
15
*/
16
17
-static inline void QEMU_ALWAYS_INLINE
18
-store_memop(void *haddr, uint64_t val, MemOp op)
19
-{
20
- switch (op) {
21
- case MO_UB:
22
- stb_p(haddr, val);
23
- break;
24
- case MO_BEUW:
25
- stw_be_p(haddr, val);
26
- break;
27
- case MO_LEUW:
28
- stw_le_p(haddr, val);
29
- break;
30
- case MO_BEUL:
31
- stl_be_p(haddr, val);
32
- break;
33
- case MO_LEUL:
34
- stl_le_p(haddr, val);
35
- break;
36
- case MO_BEUQ:
37
- stq_be_p(haddr, val);
38
- break;
39
- case MO_LEUQ:
40
- stq_le_p(haddr, val);
41
- break;
42
- default:
43
- qemu_build_not_reached();
44
- }
45
-}
46
-
47
/**
48
* do_st_mmio_leN:
49
* @env: cpu context
50
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
51
return val_le;
52
}
53
54
-/**
55
- * do_st_bytes_leN:
56
- * @p: translation parameters
57
- * @val_le: data to store
58
- *
59
- * Store @p->size bytes at @p->haddr, which is RAM.
60
- * The bytes to store are extracted in little-endian order from @val_le;
61
- * return the bytes of @val_le beyond @p->size that have not been stored.
62
- */
63
-static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
64
-{
65
- uint8_t *haddr = p->haddr;
66
- int i, size = p->size;
67
-
68
- for (i = 0; i < size; i++, val_le >>= 8) {
69
- haddr[i] = val_le;
70
- }
71
- return val_le;
72
-}
73
-
74
/*
75
* Wrapper for the above.
76
*/
77
static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
78
- uint64_t val_le, int mmu_idx, uintptr_t ra)
79
+ uint64_t val_le, int mmu_idx,
80
+ MemOp mop, uintptr_t ra)
81
{
82
+ MemOp atom;
83
+ unsigned tmp, half_size;
84
+
85
if (unlikely(p->flags & TLB_MMIO)) {
86
return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
87
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
88
return val_le >> (p->size * 8);
89
- } else {
90
- return do_st_bytes_leN(p, val_le);
91
+ }
92
+
93
+ /*
94
+ * It is a given that we cross a page and therefore there is no atomicity
95
+ * for the store as a whole, but subobjects may need attention.
96
+ */
97
+ atom = mop & MO_ATOM_MASK;
98
+ switch (atom) {
99
+ case MO_ATOM_SUBALIGN:
100
+ return store_parts_leN(p->haddr, p->size, val_le);
101
+
102
+ case MO_ATOM_IFALIGN_PAIR:
103
+ case MO_ATOM_WITHIN16_PAIR:
104
+ tmp = mop & MO_SIZE;
105
+ tmp = tmp ? tmp - 1 : 0;
106
+ half_size = 1 << tmp;
107
+ if (atom == MO_ATOM_IFALIGN_PAIR
108
+ ? p->size == half_size
109
+ : p->size >= half_size) {
110
+ if (!HAVE_al8_fast && p->size <= 4) {
111
+ return store_whole_le4(p->haddr, p->size, val_le);
112
+ } else if (HAVE_al8) {
113
+ return store_whole_le8(p->haddr, p->size, val_le);
114
+ } else {
115
+ cpu_loop_exit_atomic(env_cpu(env), ra);
116
+ }
117
+ }
118
+ /* fall through */
119
+
120
+ case MO_ATOM_IFALIGN:
121
+ case MO_ATOM_WITHIN16:
122
+ case MO_ATOM_NONE:
123
+ return store_bytes_leN(p->haddr, p->size, val_le);
124
+
125
+ default:
126
+ g_assert_not_reached();
127
}
128
}
129
130
@@ -XXX,XX +XXX,XX @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
131
if (memop & MO_BSWAP) {
132
val = bswap16(val);
133
}
134
- store_memop(p->haddr, val, MO_UW);
135
+ store_atom_2(env, ra, p->haddr, memop, val);
136
}
137
}
138
139
@@ -XXX,XX +XXX,XX @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
140
if (memop & MO_BSWAP) {
141
val = bswap32(val);
142
}
143
- store_memop(p->haddr, val, MO_UL);
144
+ store_atom_4(env, ra, p->haddr, memop, val);
145
}
146
}
147
148
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
149
if (memop & MO_BSWAP) {
150
val = bswap64(val);
151
}
152
- store_memop(p->haddr, val, MO_UQ);
153
+ store_atom_8(env, ra, p->haddr, memop, val);
154
}
155
}
156
157
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
158
if ((l.memop & MO_BSWAP) != MO_LE) {
159
val = bswap32(val);
160
}
161
- val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
162
- (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
163
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
164
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
165
}
166
167
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
168
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
169
if ((l.memop & MO_BSWAP) != MO_LE) {
170
val = bswap64(val);
171
}
172
- val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
173
- (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
174
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
175
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
176
}
177
178
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
179
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
180
index XXXXXXX..XXXXXXX 100644
181
--- a/accel/tcg/user-exec.c
182
+++ b/accel/tcg/user-exec.c
183
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
184
185
validate_memop(oi, MO_BEUW);
186
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
187
- stw_be_p(haddr, val);
188
+ store_atom_2(env, ra, haddr, get_memop(oi), be16_to_cpu(val));
189
clear_helper_retaddr();
190
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
191
}
192
@@ -XXX,XX +XXX,XX @@ void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
193
194
validate_memop(oi, MO_BEUL);
195
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
196
- stl_be_p(haddr, val);
197
+ store_atom_4(env, ra, haddr, get_memop(oi), be32_to_cpu(val));
198
clear_helper_retaddr();
199
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
200
}
201
@@ -XXX,XX +XXX,XX @@ void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
202
203
validate_memop(oi, MO_BEUQ);
204
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
205
- stq_be_p(haddr, val);
206
+ store_atom_8(env, ra, haddr, get_memop(oi), be64_to_cpu(val));
207
clear_helper_retaddr();
208
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
209
}
210
@@ -XXX,XX +XXX,XX @@ void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
211
212
validate_memop(oi, MO_LEUW);
213
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
214
- stw_le_p(haddr, val);
215
+ store_atom_2(env, ra, haddr, get_memop(oi), le16_to_cpu(val));
216
clear_helper_retaddr();
217
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
218
}
219
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
220
221
validate_memop(oi, MO_LEUL);
222
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
223
- stl_le_p(haddr, val);
224
+ store_atom_4(env, ra, haddr, get_memop(oi), le32_to_cpu(val));
225
clear_helper_retaddr();
226
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
227
}
228
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
229
230
validate_memop(oi, MO_LEUQ);
231
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
232
- stq_le_p(haddr, val);
233
+ store_atom_8(env, ra, haddr, get_memop(oi), le64_to_cpu(val));
234
clear_helper_retaddr();
235
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
236
}
237
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
238
index XXXXXXX..XXXXXXX 100644
239
--- a/accel/tcg/ldst_atomicity.c.inc
240
+++ b/accel/tcg/ldst_atomicity.c.inc
241
@@ -XXX,XX +XXX,XX @@
242
#else
243
# define HAVE_al16_fast false
244
#endif
245
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
246
+# define HAVE_al16 true
247
+#else
248
+# define HAVE_al16 false
249
+#endif
250
+
251
252
/**
253
* required_atomicity:
254
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
255
g_assert_not_reached();
256
}
257
}
258
+
259
+/**
260
+ * store_atomic2:
261
+ * @pv: host address
262
+ * @val: value to store
263
+ *
264
+ * Atomically store 2 aligned bytes to @pv.
265
+ */
266
+static inline void store_atomic2(void *pv, uint16_t val)
267
+{
268
+ uint16_t *p = __builtin_assume_aligned(pv, 2);
269
+ qatomic_set(p, val);
270
+}
271
+
272
+/**
273
+ * store_atomic4:
274
+ * @pv: host address
275
+ * @val: value to store
276
+ *
277
+ * Atomically store 4 aligned bytes to @pv.
278
+ */
279
+static inline void store_atomic4(void *pv, uint32_t val)
280
+{
281
+ uint32_t *p = __builtin_assume_aligned(pv, 4);
282
+ qatomic_set(p, val);
283
+}
284
+
285
+/**
286
+ * store_atomic8:
287
+ * @pv: host address
288
+ * @val: value to store
289
+ *
290
+ * Atomically store 8 aligned bytes to @pv.
291
+ */
292
+static inline void store_atomic8(void *pv, uint64_t val)
293
+{
294
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
295
+
296
+ qemu_build_assert(HAVE_al8);
297
+ qatomic_set__nocheck(p, val);
298
+}
299
+
300
+/**
301
+ * store_atom_4x2
302
+ */
303
+static inline void store_atom_4_by_2(void *pv, uint32_t val)
304
+{
305
+ store_atomic2(pv, val >> (HOST_BIG_ENDIAN ? 16 : 0));
306
+ store_atomic2(pv + 2, val >> (HOST_BIG_ENDIAN ? 0 : 16));
307
+}
308
+
309
+/**
310
+ * store_atom_8_by_2
311
+ */
312
+static inline void store_atom_8_by_2(void *pv, uint64_t val)
313
+{
314
+ store_atom_4_by_2(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
315
+ store_atom_4_by_2(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
316
+}
317
+
318
+/**
319
+ * store_atom_8_by_4
320
+ */
321
+static inline void store_atom_8_by_4(void *pv, uint64_t val)
322
+{
323
+ store_atomic4(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
324
+ store_atomic4(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
325
+}
326
+
327
+/**
328
+ * store_atom_insert_al4:
329
+ * @p: host address
330
+ * @val: shifted value to store
331
+ * @msk: mask for value to store
332
+ *
333
+ * Atomically store @val to @p, masked by @msk.
334
+ */
335
+static void store_atom_insert_al4(uint32_t *p, uint32_t val, uint32_t msk)
336
+{
337
+ uint32_t old, new;
338
+
339
+ p = __builtin_assume_aligned(p, 4);
340
+ old = qatomic_read(p);
341
+ do {
342
+ new = (old & ~msk) | val;
343
+ } while (!__atomic_compare_exchange_n(p, &old, new, true,
344
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
345
+}
346
+
347
+/**
348
+ * store_atom_insert_al8:
349
+ * @p: host address
350
+ * @val: shifted value to store
351
+ * @msk: mask for value to store
352
+ *
353
+ * Atomically store @val to @p masked by @msk.
354
+ */
355
+static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
356
+{
357
+ uint64_t old, new;
358
+
359
+ qemu_build_assert(HAVE_al8);
360
+ p = __builtin_assume_aligned(p, 8);
361
+ old = qatomic_read__nocheck(p);
362
+ do {
363
+ new = (old & ~msk) | val;
364
+ } while (!__atomic_compare_exchange_n(p, &old, new, true,
365
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
366
+}
367
+
368
+/**
369
+ * store_atom_insert_al16:
370
+ * @p: host address
371
+ * @val: shifted value to store
372
+ * @msk: mask for value to store
373
+ *
374
+ * Atomically store @val to @p masked by @msk.
375
+ */
376
+static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
377
+{
378
+#if defined(CONFIG_ATOMIC128)
379
+ __uint128_t *pu, old, new;
380
+
381
+ /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
382
+ pu = __builtin_assume_aligned(ps, 16);
383
+ old = *pu;
384
+ do {
385
+ new = (old & ~msk.u) | val.u;
386
+ } while (!__atomic_compare_exchange_n(pu, &old, new, true,
387
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
388
+#elif defined(CONFIG_CMPXCHG128)
389
+ __uint128_t *pu, old, new;
390
+
391
+ /*
392
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
393
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
394
+ * and accept the sequential consistency that comes with it.
395
+ */
396
+ pu = __builtin_assume_aligned(ps, 16);
397
+ do {
398
+ old = *pu;
399
+ new = (old & ~msk.u) | val.u;
400
+ } while (!__sync_bool_compare_and_swap_16(pu, old, new));
401
+#else
402
+ qemu_build_not_reached();
403
+#endif
404
+}
405
+
406
+/**
407
+ * store_bytes_leN:
408
+ * @pv: host address
409
+ * @size: number of bytes to store
410
+ * @val_le: data to store
411
+ *
412
+ * Store @size bytes at @p. The bytes to store are extracted in little-endian order
413
+ * from @val_le; return the bytes of @val_le beyond @size that have not been stored.
414
+ */
415
+static uint64_t store_bytes_leN(void *pv, int size, uint64_t val_le)
416
+{
417
+ uint8_t *p = pv;
418
+ for (int i = 0; i < size; i++, val_le >>= 8) {
419
+ p[i] = val_le;
420
+ }
421
+ return val_le;
422
+}
423
+
424
+/**
425
+ * store_parts_leN
426
+ * @pv: host address
427
+ * @size: number of bytes to store
428
+ * @val_le: data to store
429
+ *
430
+ * As store_bytes_leN, but atomically on each aligned part.
431
+ */
432
+G_GNUC_UNUSED
433
+static uint64_t store_parts_leN(void *pv, int size, uint64_t val_le)
434
+{
435
+ do {
436
+ int n;
437
+
438
+ /* Find minimum of alignment and size */
439
+ switch (((uintptr_t)pv | size) & 7) {
440
+ case 4:
441
+ store_atomic4(pv, le32_to_cpu(val_le));
442
+ val_le >>= 32;
443
+ n = 4;
444
+ break;
445
+ case 2:
446
+ case 6:
447
+ store_atomic2(pv, le16_to_cpu(val_le));
448
+ val_le >>= 16;
449
+ n = 2;
450
+ break;
451
+ default:
452
+ *(uint8_t *)pv = val_le;
453
+ val_le >>= 8;
454
+ n = 1;
455
+ break;
456
+ case 0:
457
+ g_assert_not_reached();
458
+ }
459
+ pv += n;
460
+ size -= n;
461
+ } while (size != 0);
462
+
463
+ return val_le;
464
+}
465
+
466
+/**
467
+ * store_whole_le4
468
+ * @pv: host address
469
+ * @size: number of bytes to store
470
+ * @val_le: data to store
471
+ *
472
+ * As store_bytes_leN, but atomically as a whole.
473
+ * Four aligned bytes are guaranteed to cover the store.
474
+ */
475
+static uint64_t store_whole_le4(void *pv, int size, uint64_t val_le)
476
+{
477
+ int sz = size * 8;
478
+ int o = (uintptr_t)pv & 3;
479
+ int sh = o * 8;
480
+ uint32_t m = MAKE_64BIT_MASK(0, sz);
481
+ uint32_t v;
482
+
483
+ if (HOST_BIG_ENDIAN) {
484
+ v = bswap32(val_le) >> sh;
485
+ m = bswap32(m) >> sh;
486
+ } else {
487
+ v = val_le << sh;
488
+ m <<= sh;
489
+ }
490
+ store_atom_insert_al4(pv - o, v, m);
491
+ return val_le >> sz;
492
+}
493
+
494
+/**
495
+ * store_whole_le8
496
+ * @pv: host address
497
+ * @size: number of bytes to store
498
+ * @val_le: data to store
499
+ *
500
+ * As store_bytes_leN, but atomically as a whole.
501
+ * Eight aligned bytes are guaranteed to cover the store.
502
+ */
503
+static uint64_t store_whole_le8(void *pv, int size, uint64_t val_le)
504
+{
505
+ int sz = size * 8;
506
+ int o = (uintptr_t)pv & 7;
507
+ int sh = o * 8;
508
+ uint64_t m = MAKE_64BIT_MASK(0, sz);
509
+ uint64_t v;
510
+
511
+ qemu_build_assert(HAVE_al8);
512
+ if (HOST_BIG_ENDIAN) {
513
+ v = bswap64(val_le) >> sh;
514
+ m = bswap64(m) >> sh;
515
+ } else {
516
+ v = val_le << sh;
517
+ m <<= sh;
518
+ }
519
+ store_atom_insert_al8(pv - o, v, m);
520
+ return val_le >> sz;
521
+}
522
+
523
+/**
524
+ * store_whole_le16
525
+ * @pv: host address
526
+ * @size: number of bytes to store
527
+ * @val_le: data to store
528
+ *
529
+ * As store_bytes_leN, but atomically as a whole.
530
+ * 16 aligned bytes are guaranteed to cover the store.
531
+ */
532
+static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
533
+{
534
+ int sz = size * 8;
535
+ int o = (uintptr_t)pv & 15;
536
+ int sh = o * 8;
537
+ Int128 m, v;
538
+
539
+ qemu_build_assert(HAVE_al16);
540
+
541
+ /* Like MAKE_64BIT_MASK(0, sz), but larger. */
542
+ if (sz <= 64) {
543
+ m = int128_make64(MAKE_64BIT_MASK(0, sz));
544
+ } else {
545
+ m = int128_make128(-1, MAKE_64BIT_MASK(0, sz - 64));
546
+ }
547
+
548
+ if (HOST_BIG_ENDIAN) {
549
+ v = int128_urshift(bswap128(val_le), sh);
550
+ m = int128_urshift(bswap128(m), sh);
551
+ } else {
552
+ v = int128_lshift(val_le, sh);
553
+ m = int128_lshift(m, sh);
554
+ }
555
+ store_atom_insert_al16(pv - o, v, m);
556
+
557
+ /* Unused if sz <= 64. */
558
+ return int128_gethi(val_le) >> (sz - 64);
559
+}
560
+
561
+/**
562
+ * store_atom_2:
563
+ * @p: host address
564
+ * @val: the value to store
565
+ * @memop: the full memory op
566
+ *
567
+ * Store 2 bytes to @p, honoring the atomicity of @memop.
568
+ */
569
+static void store_atom_2(CPUArchState *env, uintptr_t ra,
570
+ void *pv, MemOp memop, uint16_t val)
571
+{
572
+ uintptr_t pi = (uintptr_t)pv;
573
+ int atmax;
574
+
575
+ if (likely((pi & 1) == 0)) {
576
+ store_atomic2(pv, val);
577
+ return;
578
+ }
579
+
580
+ atmax = required_atomicity(env, pi, memop);
581
+ if (atmax == MO_8) {
582
+ stw_he_p(pv, val);
583
+ return;
584
+ }
585
+
586
+ /*
587
+ * The only case remaining is MO_ATOM_WITHIN16.
588
+ * Big or little endian, we want the middle two bytes in each test.
589
+ */
590
+ if ((pi & 3) == 1) {
591
+ store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
592
+ return;
593
+ } else if ((pi & 7) == 3) {
594
+ if (HAVE_al8) {
595
+ store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
596
+ return;
597
+ }
598
+ } else if ((pi & 15) == 7) {
599
+ if (HAVE_al16) {
600
+ Int128 v = int128_lshift(int128_make64(val), 56);
601
+ Int128 m = int128_lshift(int128_make64(0xffff), 56);
602
+ store_atom_insert_al16(pv - 7, v, m);
603
+ return;
604
+ }
605
+ } else {
606
+ g_assert_not_reached();
607
+ }
608
+
609
+ cpu_loop_exit_atomic(env_cpu(env), ra);
610
+}
611
+
612
+/**
613
+ * store_atom_4:
614
+ * @p: host address
615
+ * @val: the value to store
616
+ * @memop: the full memory op
617
+ *
618
+ * Store 4 bytes to @p, honoring the atomicity of @memop.
619
+ */
620
+static void store_atom_4(CPUArchState *env, uintptr_t ra,
621
+ void *pv, MemOp memop, uint32_t val)
622
+{
623
+ uintptr_t pi = (uintptr_t)pv;
624
+ int atmax;
625
+
626
+ if (likely((pi & 3) == 0)) {
627
+ store_atomic4(pv, val);
628
+ return;
629
+ }
630
+
631
+ atmax = required_atomicity(env, pi, memop);
632
+ switch (atmax) {
633
+ case MO_8:
634
+ stl_he_p(pv, val);
635
+ return;
636
+ case MO_16:
637
+ store_atom_4_by_2(pv, val);
638
+ return;
639
+ case -MO_16:
640
+ {
641
+ uint32_t val_le = cpu_to_le32(val);
642
+ int s2 = pi & 3;
643
+ int s1 = 4 - s2;
644
+
645
+ switch (s2) {
646
+ case 1:
647
+ val_le = store_whole_le4(pv, s1, val_le);
648
+ *(uint8_t *)(pv + 3) = val_le;
649
+ break;
650
+ case 3:
651
+ *(uint8_t *)pv = val_le;
652
+ store_whole_le4(pv + 1, s2, val_le >> 8);
653
+ break;
654
+ case 0: /* aligned */
655
+ case 2: /* atmax MO_16 */
656
+ default:
657
+ g_assert_not_reached();
658
+ }
659
+ }
660
+ return;
661
+ case MO_32:
662
+ if ((pi & 7) < 4) {
663
+ if (HAVE_al8) {
664
+ store_whole_le8(pv, 4, cpu_to_le32(val));
665
+ return;
666
+ }
667
+ } else {
668
+ if (HAVE_al16) {
669
+ store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
670
+ return;
671
+ }
672
+ }
673
+ cpu_loop_exit_atomic(env_cpu(env), ra);
674
+ default:
675
+ g_assert_not_reached();
676
+ }
677
+}
678
+
679
+/**
680
+ * store_atom_8:
681
+ * @p: host address
682
+ * @val: the value to store
683
+ * @memop: the full memory op
684
+ *
685
+ * Store 8 bytes to @p, honoring the atomicity of @memop.
686
+ */
687
+static void store_atom_8(CPUArchState *env, uintptr_t ra,
688
+ void *pv, MemOp memop, uint64_t val)
689
+{
690
+ uintptr_t pi = (uintptr_t)pv;
691
+ int atmax;
692
+
693
+ if (HAVE_al8 && likely((pi & 7) == 0)) {
694
+ store_atomic8(pv, val);
695
+ return;
696
+ }
697
+
698
+ atmax = required_atomicity(env, pi, memop);
699
+ switch (atmax) {
700
+ case MO_8:
701
+ stq_he_p(pv, val);
702
+ return;
703
+ case MO_16:
704
+ store_atom_8_by_2(pv, val);
705
+ return;
706
+ case MO_32:
707
+ store_atom_8_by_4(pv, val);
708
+ return;
709
+ case -MO_32:
710
+ if (HAVE_al8) {
711
+ uint64_t val_le = cpu_to_le64(val);
712
+ int s2 = pi & 7;
713
+ int s1 = 8 - s2;
714
+
715
+ switch (s2) {
716
+ case 1 ... 3:
717
+ val_le = store_whole_le8(pv, s1, val_le);
718
+ store_bytes_leN(pv + s1, s2, val_le);
719
+ break;
720
+ case 5 ... 7:
721
+ val_le = store_bytes_leN(pv, s1, val_le);
722
+ store_whole_le8(pv + s1, s2, val_le);
723
+ break;
724
+ case 0: /* aligned */
725
+ case 4: /* atmax MO_32 */
726
+ default:
727
+ g_assert_not_reached();
728
+ }
729
+ return;
730
+ }
731
+ break;
732
+ case MO_64:
733
+ if (HAVE_al16) {
734
+ store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
735
+ return;
736
+ }
737
+ break;
738
+ default:
739
+ g_assert_not_reached();
740
+ }
741
+ cpu_loop_exit_atomic(env_cpu(env), ra);
742
+}
743
--
744
2.34.1
diff view generated by jsdifflib
New patch
1
With the current structure of cputlb.c, there is no difference
2
between the little-endian and big-endian entry points, aside
3
from the assert. Unify the pairs of functions.
1
4
5
Hoist the qemu_{ld,st}_helpers arrays to tcg.c.
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
docs/devel/loads-stores.rst | 36 ++----
12
include/tcg/tcg-ldst.h | 60 ++++------
13
accel/tcg/cputlb.c | 190 ++++++++++---------------------
14
tcg/tcg.c | 21 ++++
15
tcg/tci.c | 61 ++++------
16
tcg/aarch64/tcg-target.c.inc | 33 ------
17
tcg/arm/tcg-target.c.inc | 37 ------
18
tcg/i386/tcg-target.c.inc | 30 +----
19
tcg/loongarch64/tcg-target.c.inc | 23 ----
20
tcg/mips/tcg-target.c.inc | 31 -----
21
tcg/ppc/tcg-target.c.inc | 30 +----
22
tcg/riscv/tcg-target.c.inc | 42 -------
23
tcg/s390x/tcg-target.c.inc | 31 +----
24
tcg/sparc64/tcg-target.c.inc | 32 +-----
25
14 files changed, 146 insertions(+), 511 deletions(-)
26
27
diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
28
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/devel/loads-stores.rst
30
+++ b/docs/devel/loads-stores.rst
31
@@ -XXX,XX +XXX,XX @@ swap: ``translator_ld{sign}{size}_swap(env, ptr, swap)``
32
Regexes for git grep
33
- ``\<translator_ld[us]\?[bwlq]\(_swap\)\?\>``
34
35
-``helper_*_{ld,st}*_mmu``
36
+``helper_{ld,st}*_mmu``
37
~~~~~~~~~~~~~~~~~~~~~~~~~
38
39
These functions are intended primarily to be called by the code
40
-generated by the TCG backend. They may also be called by target
41
-CPU helper function code. Like the ``cpu_{ld,st}_mmuidx_ra`` functions
42
-they perform accesses by guest virtual address, with a given ``mmuidx``.
43
+generated by the TCG backend. Like the ``cpu_{ld,st}_mmu`` functions
44
+they perform accesses by guest virtual address, with a given ``MemOpIdx``.
45
46
-These functions specify an ``opindex`` parameter which encodes
47
-(among other things) the mmu index to use for the access. This parameter
48
-should be created by calling ``make_memop_idx()``.
49
+They differ from ``cpu_{ld,st}_mmu`` in that they take the endianness
50
+of the operation only from the MemOpIdx, and loads extend the return
51
+value to the size of a host general register (``tcg_target_ulong``).
52
53
-The ``retaddr`` parameter should be the result of GETPC() called directly
54
-from the top level HELPER(foo) function (or 0 if no guest CPU state
55
-unwinding is required).
56
+load: ``helper_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
57
58
-**TODO** The names of these functions are a bit odd for historical
59
-reasons because they were originally expected to be called only from
60
-within generated code. We should rename them to bring them more in
61
-line with the other memory access functions. The explicit endianness
62
-is the only feature they have beyond ``*_mmuidx_ra``.
63
-
64
-load: ``helper_{endian}_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
65
-
66
-store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
67
+store: ``helper_{size}_mmu(env, addr, val, opindex, retaddr)``
68
69
``sign``
70
- (empty) : for 32 or 64 bit sizes
71
@@ -XXX,XX +XXX,XX @@ store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
72
- ``l`` : 32 bits
73
- ``q`` : 64 bits
74
75
-``endian``
76
- - ``le`` : little endian
77
- - ``be`` : big endian
78
- - ``ret`` : target endianness
79
-
80
Regexes for git grep
81
- - ``\<helper_\(le\|be\|ret\)_ld[us]\?[bwlq]_mmu\>``
82
- - ``\<helper_\(le\|be\|ret\)_st[bwlq]_mmu\>``
83
+ - ``\<helper_ld[us]\?[bwlq]_mmu\>``
84
+ - ``\<helper_st[bwlq]_mmu\>``
85
86
``address_space_*``
87
~~~~~~~~~~~~~~~~~~~
88
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/tcg/tcg-ldst.h
91
+++ b/include/tcg/tcg-ldst.h
92
@@ -XXX,XX +XXX,XX @@
93
#ifdef CONFIG_SOFTMMU
94
95
/* Value zero-extended to tcg register size. */
96
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
97
- MemOpIdx oi, uintptr_t retaddr);
98
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
99
- MemOpIdx oi, uintptr_t retaddr);
100
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
101
- MemOpIdx oi, uintptr_t retaddr);
102
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
103
- MemOpIdx oi, uintptr_t retaddr);
104
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
105
- MemOpIdx oi, uintptr_t retaddr);
106
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
107
- MemOpIdx oi, uintptr_t retaddr);
108
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
109
- MemOpIdx oi, uintptr_t retaddr);
110
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
111
+ MemOpIdx oi, uintptr_t retaddr);
112
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
113
+ MemOpIdx oi, uintptr_t retaddr);
114
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
115
+ MemOpIdx oi, uintptr_t retaddr);
116
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
117
+ MemOpIdx oi, uintptr_t retaddr);
118
119
/* Value sign-extended to tcg register size. */
120
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
121
- MemOpIdx oi, uintptr_t retaddr);
122
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
123
- MemOpIdx oi, uintptr_t retaddr);
124
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
125
- MemOpIdx oi, uintptr_t retaddr);
126
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
127
- MemOpIdx oi, uintptr_t retaddr);
128
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
129
- MemOpIdx oi, uintptr_t retaddr);
130
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
131
+ MemOpIdx oi, uintptr_t retaddr);
132
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
133
+ MemOpIdx oi, uintptr_t retaddr);
134
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
135
+ MemOpIdx oi, uintptr_t retaddr);
136
137
/*
138
* Value extended to at least uint32_t, so that some ABIs do not require
139
* zero-extension from uint8_t or uint16_t.
140
*/
141
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
142
- MemOpIdx oi, uintptr_t retaddr);
143
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
144
- MemOpIdx oi, uintptr_t retaddr);
145
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
146
- MemOpIdx oi, uintptr_t retaddr);
147
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
148
- MemOpIdx oi, uintptr_t retaddr);
149
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
150
- MemOpIdx oi, uintptr_t retaddr);
151
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
152
- MemOpIdx oi, uintptr_t retaddr);
153
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
154
- MemOpIdx oi, uintptr_t retaddr);
155
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
156
+ MemOpIdx oi, uintptr_t retaddr);
157
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
158
+ MemOpIdx oi, uintptr_t retaddr);
159
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
160
+ MemOpIdx oi, uintptr_t retaddr);
161
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
162
+ MemOpIdx oi, uintptr_t retaddr);
163
164
#else
165
166
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
167
index XXXXXXX..XXXXXXX 100644
168
--- a/accel/tcg/cputlb.c
169
+++ b/accel/tcg/cputlb.c
170
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
171
cpu_loop_exit_atomic(env_cpu(env), retaddr);
172
}
173
174
-/*
175
- * Verify that we have passed the correct MemOp to the correct function.
176
- *
177
- * In the case of the helper_*_mmu functions, we will have done this by
178
- * using the MemOp to look up the helper during code generation.
179
- *
180
- * In the case of the cpu_*_mmu functions, this is up to the caller.
181
- * We could present one function to target code, and dispatch based on
182
- * the MemOp, but so far we have worked hard to avoid an indirect function
183
- * call along the memory path.
184
- */
185
-static void validate_memop(MemOpIdx oi, MemOp expected)
186
-{
187
-#ifdef CONFIG_DEBUG_TCG
188
- MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
189
- assert(have == expected);
190
-#endif
191
-}
192
-
193
/*
194
* Load Helpers
195
*
196
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
197
return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
198
}
199
200
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
201
- MemOpIdx oi, uintptr_t retaddr)
202
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
203
+ MemOpIdx oi, uintptr_t retaddr)
204
{
205
- validate_memop(oi, MO_UB);
206
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
207
return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
208
}
209
210
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
211
return ret;
212
}
213
214
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
215
- MemOpIdx oi, uintptr_t retaddr)
216
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
217
+ MemOpIdx oi, uintptr_t retaddr)
218
{
219
- validate_memop(oi, MO_LEUW);
220
- return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
221
-}
222
-
223
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
224
- MemOpIdx oi, uintptr_t retaddr)
225
-{
226
- validate_memop(oi, MO_BEUW);
227
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
228
return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
229
}
230
231
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
232
return ret;
233
}
234
235
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
236
- MemOpIdx oi, uintptr_t retaddr)
237
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
238
+ MemOpIdx oi, uintptr_t retaddr)
239
{
240
- validate_memop(oi, MO_LEUL);
241
- return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
242
-}
243
-
244
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
245
- MemOpIdx oi, uintptr_t retaddr)
246
-{
247
- validate_memop(oi, MO_BEUL);
248
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
249
return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
250
}
251
252
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
253
return ret;
254
}
255
256
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
257
- MemOpIdx oi, uintptr_t retaddr)
258
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
259
+ MemOpIdx oi, uintptr_t retaddr)
260
{
261
- validate_memop(oi, MO_LEUQ);
262
- return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
263
-}
264
-
265
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
266
- MemOpIdx oi, uintptr_t retaddr)
267
-{
268
- validate_memop(oi, MO_BEUQ);
269
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
270
return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
271
}
272
273
@@ -XXX,XX +XXX,XX @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
274
* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
275
*/
276
277
-
278
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
279
- MemOpIdx oi, uintptr_t retaddr)
280
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
281
+ MemOpIdx oi, uintptr_t retaddr)
282
{
283
- return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
284
+ return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
285
}
286
287
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
288
- MemOpIdx oi, uintptr_t retaddr)
289
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
290
+ MemOpIdx oi, uintptr_t retaddr)
291
{
292
- return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
293
+ return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
294
}
295
296
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
297
- MemOpIdx oi, uintptr_t retaddr)
298
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
299
+ MemOpIdx oi, uintptr_t retaddr)
300
{
301
- return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
302
-}
303
-
304
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
305
- MemOpIdx oi, uintptr_t retaddr)
306
-{
307
- return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
308
-}
309
-
310
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
311
- MemOpIdx oi, uintptr_t retaddr)
312
-{
313
- return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
314
+ return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
315
}
316
317
/*
318
@@ -XXX,XX +XXX,XX @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
319
{
320
uint8_t ret;
321
322
- validate_memop(oi, MO_UB);
323
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_UB);
324
ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
325
plugin_load_cb(env, addr, oi);
326
return ret;
327
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
328
{
329
uint16_t ret;
330
331
- validate_memop(oi, MO_BEUW);
332
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
333
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
334
plugin_load_cb(env, addr, oi);
335
return ret;
336
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
337
{
338
uint32_t ret;
339
340
- validate_memop(oi, MO_BEUL);
341
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
342
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
343
plugin_load_cb(env, addr, oi);
344
return ret;
345
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
346
{
347
uint64_t ret;
348
349
- validate_memop(oi, MO_BEUQ);
350
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
351
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
352
plugin_load_cb(env, addr, oi);
353
return ret;
354
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
355
{
356
uint16_t ret;
357
358
- validate_memop(oi, MO_LEUW);
359
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
360
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
361
plugin_load_cb(env, addr, oi);
362
return ret;
363
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
364
{
365
uint32_t ret;
366
367
- validate_memop(oi, MO_LEUL);
368
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
369
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
370
plugin_load_cb(env, addr, oi);
371
return ret;
372
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
373
{
374
uint64_t ret;
375
376
- validate_memop(oi, MO_LEUQ);
377
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
378
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
379
plugin_load_cb(env, addr, oi);
380
return ret;
381
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
382
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
383
new_oi = make_memop_idx(mop, mmu_idx);
384
385
- h = helper_be_ldq_mmu(env, addr, new_oi, ra);
386
- l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
387
+ h = helper_ldq_mmu(env, addr, new_oi, ra);
388
+ l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
389
390
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
391
return int128_make128(l, h);
392
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
393
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
394
new_oi = make_memop_idx(mop, mmu_idx);
395
396
- l = helper_le_ldq_mmu(env, addr, new_oi, ra);
397
- h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
398
+ l = helper_ldq_mmu(env, addr, new_oi, ra);
399
+ h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
400
401
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
402
return int128_make128(l, h);
403
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
404
}
405
}
406
407
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
408
- MemOpIdx oi, uintptr_t ra)
409
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
410
+ MemOpIdx oi, uintptr_t ra)
411
{
412
MMULookupLocals l;
413
bool crosspage;
414
415
- validate_memop(oi, MO_UB);
416
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
417
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
418
tcg_debug_assert(!crosspage);
419
420
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
421
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
422
}
423
424
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
425
- MemOpIdx oi, uintptr_t retaddr)
426
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
427
+ MemOpIdx oi, uintptr_t retaddr)
428
{
429
- validate_memop(oi, MO_LEUW);
430
- do_st2_mmu(env, addr, val, oi, retaddr);
431
-}
432
-
433
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
434
- MemOpIdx oi, uintptr_t retaddr)
435
-{
436
- validate_memop(oi, MO_BEUW);
437
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
438
do_st2_mmu(env, addr, val, oi, retaddr);
439
}
440
441
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
442
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
443
}
444
445
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
446
- MemOpIdx oi, uintptr_t retaddr)
447
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
448
+ MemOpIdx oi, uintptr_t retaddr)
449
{
450
- validate_memop(oi, MO_LEUL);
451
- do_st4_mmu(env, addr, val, oi, retaddr);
452
-}
453
-
454
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
455
- MemOpIdx oi, uintptr_t retaddr)
456
-{
457
- validate_memop(oi, MO_BEUL);
458
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
459
do_st4_mmu(env, addr, val, oi, retaddr);
460
}
461
462
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
463
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
464
}
465
466
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
467
- MemOpIdx oi, uintptr_t retaddr)
468
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
469
+ MemOpIdx oi, uintptr_t retaddr)
470
{
471
- validate_memop(oi, MO_LEUQ);
472
- do_st8_mmu(env, addr, val, oi, retaddr);
473
-}
474
-
475
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
476
- MemOpIdx oi, uintptr_t retaddr)
477
-{
478
- validate_memop(oi, MO_BEUQ);
479
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
480
do_st8_mmu(env, addr, val, oi, retaddr);
481
}
482
483
@@ -XXX,XX +XXX,XX @@ static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
484
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
485
MemOpIdx oi, uintptr_t retaddr)
486
{
487
- helper_ret_stb_mmu(env, addr, val, oi, retaddr);
488
+ helper_stb_mmu(env, addr, val, oi, retaddr);
489
plugin_store_cb(env, addr, oi);
490
}
491
492
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
493
MemOpIdx oi, uintptr_t retaddr)
494
{
495
- helper_be_stw_mmu(env, addr, val, oi, retaddr);
496
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
497
+ do_st2_mmu(env, addr, val, oi, retaddr);
498
plugin_store_cb(env, addr, oi);
499
}
500
501
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
502
MemOpIdx oi, uintptr_t retaddr)
503
{
504
- helper_be_stl_mmu(env, addr, val, oi, retaddr);
505
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
506
+ do_st4_mmu(env, addr, val, oi, retaddr);
507
plugin_store_cb(env, addr, oi);
508
}
509
510
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
511
MemOpIdx oi, uintptr_t retaddr)
512
{
513
- helper_be_stq_mmu(env, addr, val, oi, retaddr);
514
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
515
+ do_st8_mmu(env, addr, val, oi, retaddr);
516
plugin_store_cb(env, addr, oi);
517
}
518
519
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
520
MemOpIdx oi, uintptr_t retaddr)
521
{
522
- helper_le_stw_mmu(env, addr, val, oi, retaddr);
523
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
524
+ do_st2_mmu(env, addr, val, oi, retaddr);
525
plugin_store_cb(env, addr, oi);
526
}
527
528
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
529
MemOpIdx oi, uintptr_t retaddr)
530
{
531
- helper_le_stl_mmu(env, addr, val, oi, retaddr);
532
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
533
+ do_st4_mmu(env, addr, val, oi, retaddr);
534
plugin_store_cb(env, addr, oi);
535
}
536
537
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
538
MemOpIdx oi, uintptr_t retaddr)
539
{
540
- helper_le_stq_mmu(env, addr, val, oi, retaddr);
541
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
542
+ do_st8_mmu(env, addr, val, oi, retaddr);
543
plugin_store_cb(env, addr, oi);
544
}
545
546
@@ -XXX,XX +XXX,XX @@ void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
547
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
548
new_oi = make_memop_idx(mop, mmu_idx);
549
550
- helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
551
- helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
552
+ helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
553
+ helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
554
555
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
556
}
557
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
558
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
559
new_oi = make_memop_idx(mop, mmu_idx);
560
561
- helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
562
- helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
563
+ helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
564
+ helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
565
566
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
567
}
568
diff --git a/tcg/tcg.c b/tcg/tcg.c
569
index XXXXXXX..XXXXXXX 100644
570
--- a/tcg/tcg.c
571
+++ b/tcg/tcg.c
572
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
573
const TCGLdstHelperParam *p)
574
__attribute__((unused));
575
576
+#ifdef CONFIG_SOFTMMU
577
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
578
+ [MO_UB] = helper_ldub_mmu,
579
+ [MO_SB] = helper_ldsb_mmu,
580
+ [MO_UW] = helper_lduw_mmu,
581
+ [MO_SW] = helper_ldsw_mmu,
582
+ [MO_UL] = helper_ldul_mmu,
583
+ [MO_UQ] = helper_ldq_mmu,
584
+#if TCG_TARGET_REG_BITS == 64
585
+ [MO_SL] = helper_ldsl_mmu,
586
+#endif
587
+};
588
+
589
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
590
+ [MO_8] = helper_stb_mmu,
591
+ [MO_16] = helper_stw_mmu,
592
+ [MO_32] = helper_stl_mmu,
593
+ [MO_64] = helper_stq_mmu,
594
+};
595
+#endif
596
+
597
TCGContext tcg_init_ctx;
598
__thread TCGContext *tcg_ctx;
599
600
diff --git a/tcg/tci.c b/tcg/tci.c
601
index XXXXXXX..XXXXXXX 100644
602
--- a/tcg/tci.c
603
+++ b/tcg/tci.c
604
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
605
uintptr_t ra = (uintptr_t)tb_ptr;
606
607
#ifdef CONFIG_SOFTMMU
608
- switch (mop & (MO_BSWAP | MO_SSIZE)) {
609
+ switch (mop & MO_SSIZE) {
610
case MO_UB:
611
- return helper_ret_ldub_mmu(env, taddr, oi, ra);
612
+ return helper_ldub_mmu(env, taddr, oi, ra);
613
case MO_SB:
614
- return helper_ret_ldsb_mmu(env, taddr, oi, ra);
615
- case MO_LEUW:
616
- return helper_le_lduw_mmu(env, taddr, oi, ra);
617
- case MO_LESW:
618
- return helper_le_ldsw_mmu(env, taddr, oi, ra);
619
- case MO_LEUL:
620
- return helper_le_ldul_mmu(env, taddr, oi, ra);
621
- case MO_LESL:
622
- return helper_le_ldsl_mmu(env, taddr, oi, ra);
623
- case MO_LEUQ:
624
- return helper_le_ldq_mmu(env, taddr, oi, ra);
625
- case MO_BEUW:
626
- return helper_be_lduw_mmu(env, taddr, oi, ra);
627
- case MO_BESW:
628
- return helper_be_ldsw_mmu(env, taddr, oi, ra);
629
- case MO_BEUL:
630
- return helper_be_ldul_mmu(env, taddr, oi, ra);
631
- case MO_BESL:
632
- return helper_be_ldsl_mmu(env, taddr, oi, ra);
633
- case MO_BEUQ:
634
- return helper_be_ldq_mmu(env, taddr, oi, ra);
635
+ return helper_ldsb_mmu(env, taddr, oi, ra);
636
+ case MO_UW:
637
+ return helper_lduw_mmu(env, taddr, oi, ra);
638
+ case MO_SW:
639
+ return helper_ldsw_mmu(env, taddr, oi, ra);
640
+ case MO_UL:
641
+ return helper_ldul_mmu(env, taddr, oi, ra);
642
+ case MO_SL:
643
+ return helper_ldsl_mmu(env, taddr, oi, ra);
644
+ case MO_UQ:
645
+ return helper_ldq_mmu(env, taddr, oi, ra);
646
default:
647
g_assert_not_reached();
648
}
649
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
650
uintptr_t ra = (uintptr_t)tb_ptr;
651
652
#ifdef CONFIG_SOFTMMU
653
- switch (mop & (MO_BSWAP | MO_SIZE)) {
654
+ switch (mop & MO_SIZE) {
655
case MO_UB:
656
- helper_ret_stb_mmu(env, taddr, val, oi, ra);
657
+ helper_stb_mmu(env, taddr, val, oi, ra);
658
break;
659
- case MO_LEUW:
660
- helper_le_stw_mmu(env, taddr, val, oi, ra);
661
+ case MO_UW:
662
+ helper_stw_mmu(env, taddr, val, oi, ra);
663
break;
664
- case MO_LEUL:
665
- helper_le_stl_mmu(env, taddr, val, oi, ra);
666
+ case MO_UL:
667
+ helper_stl_mmu(env, taddr, val, oi, ra);
668
break;
669
- case MO_LEUQ:
670
- helper_le_stq_mmu(env, taddr, val, oi, ra);
671
- break;
672
- case MO_BEUW:
673
- helper_be_stw_mmu(env, taddr, val, oi, ra);
674
- break;
675
- case MO_BEUL:
676
- helper_be_stl_mmu(env, taddr, val, oi, ra);
677
- break;
678
- case MO_BEUQ:
679
- helper_be_stq_mmu(env, taddr, val, oi, ra);
680
+ case MO_UQ:
681
+ helper_stq_mmu(env, taddr, val, oi, ra);
682
break;
683
default:
684
g_assert_not_reached();
685
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
686
index XXXXXXX..XXXXXXX 100644
687
--- a/tcg/aarch64/tcg-target.c.inc
688
+++ b/tcg/aarch64/tcg-target.c.inc
689
@@ -XXX,XX +XXX,XX @@ typedef struct {
690
} HostAddress;
691
692
#ifdef CONFIG_SOFTMMU
693
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
694
- * MemOpIdx oi, uintptr_t ra)
695
- */
696
-static void * const qemu_ld_helpers[MO_SIZE + 1] = {
697
- [MO_8] = helper_ret_ldub_mmu,
698
-#if HOST_BIG_ENDIAN
699
- [MO_16] = helper_be_lduw_mmu,
700
- [MO_32] = helper_be_ldul_mmu,
701
- [MO_64] = helper_be_ldq_mmu,
702
-#else
703
- [MO_16] = helper_le_lduw_mmu,
704
- [MO_32] = helper_le_ldul_mmu,
705
- [MO_64] = helper_le_ldq_mmu,
706
-#endif
707
-};
708
-
709
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
710
- * uintxx_t val, MemOpIdx oi,
711
- * uintptr_t ra)
712
- */
713
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
714
- [MO_8] = helper_ret_stb_mmu,
715
-#if HOST_BIG_ENDIAN
716
- [MO_16] = helper_be_stw_mmu,
717
- [MO_32] = helper_be_stl_mmu,
718
- [MO_64] = helper_be_stq_mmu,
719
-#else
720
- [MO_16] = helper_le_stw_mmu,
721
- [MO_32] = helper_le_stl_mmu,
722
- [MO_64] = helper_le_stq_mmu,
723
-#endif
724
-};
725
-
726
static const TCGLdstHelperParam ldst_helper_param = {
727
.ntmp = 1, .tmp = { TCG_REG_TMP }
728
};
729
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
730
index XXXXXXX..XXXXXXX 100644
731
--- a/tcg/arm/tcg-target.c.inc
732
+++ b/tcg/arm/tcg-target.c.inc
733
@@ -XXX,XX +XXX,XX @@ typedef struct {
734
} HostAddress;
735
736
#ifdef CONFIG_SOFTMMU
737
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
738
- * int mmu_idx, uintptr_t ra)
739
- */
740
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
741
- [MO_UB] = helper_ret_ldub_mmu,
742
- [MO_SB] = helper_ret_ldsb_mmu,
743
-#if HOST_BIG_ENDIAN
744
- [MO_UW] = helper_be_lduw_mmu,
745
- [MO_UL] = helper_be_ldul_mmu,
746
- [MO_UQ] = helper_be_ldq_mmu,
747
- [MO_SW] = helper_be_ldsw_mmu,
748
- [MO_SL] = helper_be_ldul_mmu,
749
-#else
750
- [MO_UW] = helper_le_lduw_mmu,
751
- [MO_UL] = helper_le_ldul_mmu,
752
- [MO_UQ] = helper_le_ldq_mmu,
753
- [MO_SW] = helper_le_ldsw_mmu,
754
- [MO_SL] = helper_le_ldul_mmu,
755
-#endif
756
-};
757
-
758
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
759
- * uintxx_t val, int mmu_idx, uintptr_t ra)
760
- */
761
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
762
- [MO_8] = helper_ret_stb_mmu,
763
-#if HOST_BIG_ENDIAN
764
- [MO_16] = helper_be_stw_mmu,
765
- [MO_32] = helper_be_stl_mmu,
766
- [MO_64] = helper_be_stq_mmu,
767
-#else
768
- [MO_16] = helper_le_stw_mmu,
769
- [MO_32] = helper_le_stl_mmu,
770
- [MO_64] = helper_le_stq_mmu,
771
-#endif
772
-};
773
-
774
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
775
{
776
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
777
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
778
index XXXXXXX..XXXXXXX 100644
779
--- a/tcg/i386/tcg-target.c.inc
780
+++ b/tcg/i386/tcg-target.c.inc
781
@@ -XXX,XX +XXX,XX @@ typedef struct {
782
} HostAddress;
783
784
#if defined(CONFIG_SOFTMMU)
785
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
786
- * int mmu_idx, uintptr_t ra)
787
- */
788
-static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
789
- [MO_UB] = helper_ret_ldub_mmu,
790
- [MO_LEUW] = helper_le_lduw_mmu,
791
- [MO_LEUL] = helper_le_ldul_mmu,
792
- [MO_LEUQ] = helper_le_ldq_mmu,
793
- [MO_BEUW] = helper_be_lduw_mmu,
794
- [MO_BEUL] = helper_be_ldul_mmu,
795
- [MO_BEUQ] = helper_be_ldq_mmu,
796
-};
797
-
798
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
799
- * uintxx_t val, int mmu_idx, uintptr_t ra)
800
- */
801
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
802
- [MO_UB] = helper_ret_stb_mmu,
803
- [MO_LEUW] = helper_le_stw_mmu,
804
- [MO_LEUL] = helper_le_stl_mmu,
805
- [MO_LEUQ] = helper_le_stq_mmu,
806
- [MO_BEUW] = helper_be_stw_mmu,
807
- [MO_BEUL] = helper_be_stl_mmu,
808
- [MO_BEUQ] = helper_be_stq_mmu,
809
-};
810
-
811
/*
812
* Because i686 has no register parameters and because x86_64 has xchg
813
* to handle addr/data register overlap, we have placed all input arguments
814
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
815
}
816
817
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
818
- tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
819
+ tcg_out_branch(s, 1, qemu_ld_helpers[opc & MO_SIZE]);
820
tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
821
822
tcg_out_jmp(s, l->raddr);
823
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
824
}
825
826
tcg_out_st_helper_args(s, l, &ldst_helper_param);
827
- tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
828
+ tcg_out_branch(s, 1, qemu_st_helpers[opc & MO_SIZE]);
829
830
tcg_out_jmp(s, l->raddr);
831
return true;
832
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
833
index XXXXXXX..XXXXXXX 100644
834
--- a/tcg/loongarch64/tcg-target.c.inc
835
+++ b/tcg/loongarch64/tcg-target.c.inc
836
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
837
*/
838
839
#if defined(CONFIG_SOFTMMU)
840
-/*
841
- * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
842
- * MemOpIdx oi, uintptr_t ra)
843
- */
844
-static void * const qemu_ld_helpers[4] = {
845
- [MO_8] = helper_ret_ldub_mmu,
846
- [MO_16] = helper_le_lduw_mmu,
847
- [MO_32] = helper_le_ldul_mmu,
848
- [MO_64] = helper_le_ldq_mmu,
849
-};
850
-
851
-/*
852
- * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
853
- * uintxx_t val, MemOpIdx oi,
854
- * uintptr_t ra)
855
- */
856
-static void * const qemu_st_helpers[4] = {
857
- [MO_8] = helper_ret_stb_mmu,
858
- [MO_16] = helper_le_stw_mmu,
859
- [MO_32] = helper_le_stl_mmu,
860
- [MO_64] = helper_le_stq_mmu,
861
-};
862
-
863
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
864
{
865
tcg_out_opc_b(s, 0);
866
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
867
index XXXXXXX..XXXXXXX 100644
868
--- a/tcg/mips/tcg-target.c.inc
869
+++ b/tcg/mips/tcg-target.c.inc
870
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
871
}
872
873
#if defined(CONFIG_SOFTMMU)
874
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
875
- [MO_UB] = helper_ret_ldub_mmu,
876
- [MO_SB] = helper_ret_ldsb_mmu,
877
-#if HOST_BIG_ENDIAN
878
- [MO_UW] = helper_be_lduw_mmu,
879
- [MO_SW] = helper_be_ldsw_mmu,
880
- [MO_UL] = helper_be_ldul_mmu,
881
- [MO_SL] = helper_be_ldsl_mmu,
882
- [MO_UQ] = helper_be_ldq_mmu,
883
-#else
884
- [MO_UW] = helper_le_lduw_mmu,
885
- [MO_SW] = helper_le_ldsw_mmu,
886
- [MO_UL] = helper_le_ldul_mmu,
887
- [MO_UQ] = helper_le_ldq_mmu,
888
- [MO_SL] = helper_le_ldsl_mmu,
889
-#endif
890
-};
891
-
892
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
893
- [MO_UB] = helper_ret_stb_mmu,
894
-#if HOST_BIG_ENDIAN
895
- [MO_UW] = helper_be_stw_mmu,
896
- [MO_UL] = helper_be_stl_mmu,
897
- [MO_UQ] = helper_be_stq_mmu,
898
-#else
899
- [MO_UW] = helper_le_stw_mmu,
900
- [MO_UL] = helper_le_stl_mmu,
901
- [MO_UQ] = helper_le_stq_mmu,
902
-#endif
903
-};
904
-
905
/* We have four temps, we might as well expose three of them. */
906
static const TCGLdstHelperParam ldst_helper_param = {
907
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
908
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
909
index XXXXXXX..XXXXXXX 100644
910
--- a/tcg/ppc/tcg-target.c.inc
911
+++ b/tcg/ppc/tcg-target.c.inc
912
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
913
};
914
915
#if defined (CONFIG_SOFTMMU)
916
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
917
- * int mmu_idx, uintptr_t ra)
918
- */
919
-static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
920
- [MO_UB] = helper_ret_ldub_mmu,
921
- [MO_LEUW] = helper_le_lduw_mmu,
922
- [MO_LEUL] = helper_le_ldul_mmu,
923
- [MO_LEUQ] = helper_le_ldq_mmu,
924
- [MO_BEUW] = helper_be_lduw_mmu,
925
- [MO_BEUL] = helper_be_ldul_mmu,
926
- [MO_BEUQ] = helper_be_ldq_mmu,
927
-};
928
-
929
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
930
- * uintxx_t val, int mmu_idx, uintptr_t ra)
931
- */
932
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
933
- [MO_UB] = helper_ret_stb_mmu,
934
- [MO_LEUW] = helper_le_stw_mmu,
935
- [MO_LEUL] = helper_le_stl_mmu,
936
- [MO_LEUQ] = helper_le_stq_mmu,
937
- [MO_BEUW] = helper_be_stw_mmu,
938
- [MO_BEUL] = helper_be_stl_mmu,
939
- [MO_BEUQ] = helper_be_stq_mmu,
940
-};
941
-
942
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
943
{
944
if (arg < 0) {
945
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
946
}
947
948
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
949
- tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
950
+ tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
951
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
952
953
tcg_out_b(s, 0, lb->raddr);
954
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
955
}
956
957
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
958
- tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
959
+ tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
960
961
tcg_out_b(s, 0, lb->raddr);
962
return true;
963
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
964
index XXXXXXX..XXXXXXX 100644
965
--- a/tcg/riscv/tcg-target.c.inc
966
+++ b/tcg/riscv/tcg-target.c.inc
967
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
968
*/
969
970
#if defined(CONFIG_SOFTMMU)
971
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
972
- * MemOpIdx oi, uintptr_t ra)
973
- */
974
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
975
- [MO_UB] = helper_ret_ldub_mmu,
976
- [MO_SB] = helper_ret_ldsb_mmu,
977
-#if HOST_BIG_ENDIAN
978
- [MO_UW] = helper_be_lduw_mmu,
979
- [MO_SW] = helper_be_ldsw_mmu,
980
- [MO_UL] = helper_be_ldul_mmu,
981
-#if TCG_TARGET_REG_BITS == 64
982
- [MO_SL] = helper_be_ldsl_mmu,
983
-#endif
984
- [MO_UQ] = helper_be_ldq_mmu,
985
-#else
986
- [MO_UW] = helper_le_lduw_mmu,
987
- [MO_SW] = helper_le_ldsw_mmu,
988
- [MO_UL] = helper_le_ldul_mmu,
989
-#if TCG_TARGET_REG_BITS == 64
990
- [MO_SL] = helper_le_ldsl_mmu,
991
-#endif
992
- [MO_UQ] = helper_le_ldq_mmu,
993
-#endif
994
-};
995
-
996
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
997
- * uintxx_t val, MemOpIdx oi,
998
- * uintptr_t ra)
999
- */
1000
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
1001
- [MO_8] = helper_ret_stb_mmu,
1002
-#if HOST_BIG_ENDIAN
1003
- [MO_16] = helper_be_stw_mmu,
1004
- [MO_32] = helper_be_stl_mmu,
1005
- [MO_64] = helper_be_stq_mmu,
1006
-#else
1007
- [MO_16] = helper_le_stw_mmu,
1008
- [MO_32] = helper_le_stl_mmu,
1009
- [MO_64] = helper_le_stq_mmu,
1010
-#endif
1011
-};
1012
-
1013
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1014
{
1015
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1016
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
1017
index XXXXXXX..XXXXXXX 100644
1018
--- a/tcg/s390x/tcg-target.c.inc
1019
+++ b/tcg/s390x/tcg-target.c.inc
1020
@@ -XXX,XX +XXX,XX @@ static const uint8_t tcg_cond_to_ltr_cond[] = {
1021
[TCG_COND_GEU] = S390_CC_ALWAYS,
1022
};
1023
1024
-#ifdef CONFIG_SOFTMMU
1025
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
1026
- [MO_UB] = helper_ret_ldub_mmu,
1027
- [MO_SB] = helper_ret_ldsb_mmu,
1028
- [MO_LEUW] = helper_le_lduw_mmu,
1029
- [MO_LESW] = helper_le_ldsw_mmu,
1030
- [MO_LEUL] = helper_le_ldul_mmu,
1031
- [MO_LESL] = helper_le_ldsl_mmu,
1032
- [MO_LEUQ] = helper_le_ldq_mmu,
1033
- [MO_BEUW] = helper_be_lduw_mmu,
1034
- [MO_BESW] = helper_be_ldsw_mmu,
1035
- [MO_BEUL] = helper_be_ldul_mmu,
1036
- [MO_BESL] = helper_be_ldsl_mmu,
1037
- [MO_BEUQ] = helper_be_ldq_mmu,
1038
-};
1039
-
1040
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1041
- [MO_UB] = helper_ret_stb_mmu,
1042
- [MO_LEUW] = helper_le_stw_mmu,
1043
- [MO_LEUL] = helper_le_stl_mmu,
1044
- [MO_LEUQ] = helper_le_stq_mmu,
1045
- [MO_BEUW] = helper_be_stw_mmu,
1046
- [MO_BEUL] = helper_be_stl_mmu,
1047
- [MO_BEUQ] = helper_be_stq_mmu,
1048
-};
1049
-#endif
1050
-
1051
static const tcg_insn_unit *tb_ret_addr;
1052
uint64_t s390_facilities[3];
1053
1054
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1055
}
1056
1057
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1058
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1059
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1060
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1061
1062
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1063
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1064
}
1065
1066
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1067
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1068
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1069
1070
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1071
return true;
1072
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
1073
index XXXXXXX..XXXXXXX 100644
1074
--- a/tcg/sparc64/tcg-target.c.inc
1075
+++ b/tcg/sparc64/tcg-target.c.inc
1076
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
1077
}
1078
1079
#ifdef CONFIG_SOFTMMU
1080
-static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
1081
-static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
1082
+static const tcg_insn_unit *qemu_ld_trampoline[MO_SSIZE + 1];
1083
+static const tcg_insn_unit *qemu_st_trampoline[MO_SIZE + 1];
1084
1085
static void build_trampolines(TCGContext *s)
1086
{
1087
- static void * const qemu_ld_helpers[] = {
1088
- [MO_UB] = helper_ret_ldub_mmu,
1089
- [MO_SB] = helper_ret_ldsb_mmu,
1090
- [MO_LEUW] = helper_le_lduw_mmu,
1091
- [MO_LESW] = helper_le_ldsw_mmu,
1092
- [MO_LEUL] = helper_le_ldul_mmu,
1093
- [MO_LEUQ] = helper_le_ldq_mmu,
1094
- [MO_BEUW] = helper_be_lduw_mmu,
1095
- [MO_BESW] = helper_be_ldsw_mmu,
1096
- [MO_BEUL] = helper_be_ldul_mmu,
1097
- [MO_BEUQ] = helper_be_ldq_mmu,
1098
- };
1099
- static void * const qemu_st_helpers[] = {
1100
- [MO_UB] = helper_ret_stb_mmu,
1101
- [MO_LEUW] = helper_le_stw_mmu,
1102
- [MO_LEUL] = helper_le_stl_mmu,
1103
- [MO_LEUQ] = helper_le_stq_mmu,
1104
- [MO_BEUW] = helper_be_stw_mmu,
1105
- [MO_BEUL] = helper_be_stl_mmu,
1106
- [MO_BEUQ] = helper_be_stq_mmu,
1107
- };
1108
-
1109
int i;
1110
1111
for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
1112
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1113
/* We use the helpers to extend SB and SW data, leaving the case
1114
of SL needing explicit extending below. */
1115
if ((memop & MO_SSIZE) == MO_SL) {
1116
- func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1117
+ func = qemu_ld_trampoline[MO_UL];
1118
} else {
1119
- func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1120
+ func = qemu_ld_trampoline[memop & MO_SSIZE];
1121
}
1122
tcg_debug_assert(func != NULL);
1123
tcg_out_call_nodelay(s, func, false);
1124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1125
tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
1126
TCG_REG_O2, data_type, memop & MO_SIZE, data);
1127
1128
- func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1129
+ func = qemu_st_trampoline[memop & MO_SIZE];
1130
tcg_debug_assert(func != NULL);
1131
tcg_out_call_nodelay(s, func, false);
1132
/* delay slot */
1133
--
1134
2.34.1
1135
1136
diff view generated by jsdifflib
New patch
1
TCG backends may need to defer to a helper to implement
2
the atomicity required by a given operation. Mirror the
3
interface used in system mode.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-ldst.h | 6 +-
9
accel/tcg/user-exec.c | 393 ++++++++++++++++++++++++++++-------------
10
tcg/tcg.c | 6 +-
11
3 files changed, 278 insertions(+), 127 deletions(-)
12
13
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-ldst.h
16
+++ b/include/tcg/tcg-ldst.h
17
@@ -XXX,XX +XXX,XX @@
18
#ifndef TCG_LDST_H
19
#define TCG_LDST_H
20
21
-#ifdef CONFIG_SOFTMMU
22
-
23
/* Value zero-extended to tcg register size. */
24
tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
25
MemOpIdx oi, uintptr_t retaddr);
26
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
27
void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
28
MemOpIdx oi, uintptr_t retaddr);
29
30
-#else
31
+#ifdef CONFIG_USER_ONLY
32
33
G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
34
G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
35
36
-#endif /* CONFIG_SOFTMMU */
37
+#endif /* CONFIG_USER_ONLY */
38
#endif /* TCG_LDST_H */
39
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/accel/tcg/user-exec.c
42
+++ b/accel/tcg/user-exec.c
43
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
44
45
/* The softmmu versions of these helpers are in cputlb.c. */
46
47
-/*
48
- * Verify that we have passed the correct MemOp to the correct function.
49
- *
50
- * We could present one function to target code, and dispatch based on
51
- * the MemOp, but so far we have worked hard to avoid an indirect function
52
- * call along the memory path.
53
- */
54
-static void validate_memop(MemOpIdx oi, MemOp expected)
55
-{
56
-#ifdef CONFIG_DEBUG_TCG
57
- MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
58
- assert(have == expected);
59
-#endif
60
-}
61
-
62
void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
63
{
64
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
65
@@ -XXX,XX +XXX,XX @@ void helper_unaligned_st(CPUArchState *env, target_ulong addr)
66
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
67
}
68
69
-static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
70
- MemOpIdx oi, uintptr_t ra, MMUAccessType type)
71
+static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
72
+ MemOp mop, uintptr_t ra, MMUAccessType type)
73
{
74
- MemOp mop = get_memop(oi);
75
int a_bits = get_alignment_bits(mop);
76
void *ret;
77
78
@@ -XXX,XX +XXX,XX @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
79
80
#include "ldst_atomicity.c.inc"
81
82
-uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
83
- MemOpIdx oi, uintptr_t ra)
84
+static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
85
+ MemOp mop, uintptr_t ra)
86
{
87
void *haddr;
88
uint8_t ret;
89
90
- validate_memop(oi, MO_UB);
91
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
92
+ tcg_debug_assert((mop & MO_SIZE) == MO_8);
93
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
94
ret = ldub_p(haddr);
95
clear_helper_retaddr();
96
+ return ret;
97
+}
98
+
99
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
100
+ MemOpIdx oi, uintptr_t ra)
101
+{
102
+ return do_ld1_mmu(env, addr, get_memop(oi), ra);
103
+}
104
+
105
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
106
+ MemOpIdx oi, uintptr_t ra)
107
+{
108
+ return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
109
+}
110
+
111
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
112
+ MemOpIdx oi, uintptr_t ra)
113
+{
114
+ uint8_t ret = do_ld1_mmu(env, addr, get_memop(oi), ra);
115
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
116
return ret;
117
}
118
119
+static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
120
+ MemOp mop, uintptr_t ra)
121
+{
122
+ void *haddr;
123
+ uint16_t ret;
124
+
125
+ tcg_debug_assert((mop & MO_SIZE) == MO_16);
126
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
127
+ ret = load_atom_2(env, ra, haddr, mop);
128
+ clear_helper_retaddr();
129
+ return ret;
130
+}
131
+
132
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
133
+ MemOpIdx oi, uintptr_t ra)
134
+{
135
+ MemOp mop = get_memop(oi);
136
+ uint16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
137
+
138
+ if (mop & MO_BSWAP) {
139
+ ret = bswap16(ret);
140
+ }
141
+ return ret;
142
+}
143
+
144
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
145
+ MemOpIdx oi, uintptr_t ra)
146
+{
147
+ MemOp mop = get_memop(oi);
148
+ int16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
149
+
150
+ if (mop & MO_BSWAP) {
151
+ ret = bswap16(ret);
152
+ }
153
+ return ret;
154
+}
155
+
156
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
157
MemOpIdx oi, uintptr_t ra)
158
{
159
- void *haddr;
160
+ MemOp mop = get_memop(oi);
161
uint16_t ret;
162
163
- validate_memop(oi, MO_BEUW);
164
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
165
- ret = load_atom_2(env, ra, haddr, get_memop(oi));
166
- clear_helper_retaddr();
167
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
168
+ ret = do_ld2_he_mmu(env, addr, mop, ra);
169
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
170
return cpu_to_be16(ret);
171
}
172
173
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
174
- MemOpIdx oi, uintptr_t ra)
175
-{
176
- void *haddr;
177
- uint32_t ret;
178
-
179
- validate_memop(oi, MO_BEUL);
180
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
181
- ret = load_atom_4(env, ra, haddr, get_memop(oi));
182
- clear_helper_retaddr();
183
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
184
- return cpu_to_be32(ret);
185
-}
186
-
187
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
188
- MemOpIdx oi, uintptr_t ra)
189
-{
190
- void *haddr;
191
- uint64_t ret;
192
-
193
- validate_memop(oi, MO_BEUQ);
194
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
195
- ret = load_atom_8(env, ra, haddr, get_memop(oi));
196
- clear_helper_retaddr();
197
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
198
- return cpu_to_be64(ret);
199
-}
200
-
201
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
202
MemOpIdx oi, uintptr_t ra)
203
{
204
- void *haddr;
205
+ MemOp mop = get_memop(oi);
206
uint16_t ret;
207
208
- validate_memop(oi, MO_LEUW);
209
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
210
- ret = load_atom_2(env, ra, haddr, get_memop(oi));
211
- clear_helper_retaddr();
212
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
213
+ ret = do_ld2_he_mmu(env, addr, mop, ra);
214
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
215
return cpu_to_le16(ret);
216
}
217
218
+static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
219
+ MemOp mop, uintptr_t ra)
220
+{
221
+ void *haddr;
222
+ uint32_t ret;
223
+
224
+ tcg_debug_assert((mop & MO_SIZE) == MO_32);
225
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
226
+ ret = load_atom_4(env, ra, haddr, mop);
227
+ clear_helper_retaddr();
228
+ return ret;
229
+}
230
+
231
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
232
+ MemOpIdx oi, uintptr_t ra)
233
+{
234
+ MemOp mop = get_memop(oi);
235
+ uint32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
236
+
237
+ if (mop & MO_BSWAP) {
238
+ ret = bswap32(ret);
239
+ }
240
+ return ret;
241
+}
242
+
243
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
244
+ MemOpIdx oi, uintptr_t ra)
245
+{
246
+ MemOp mop = get_memop(oi);
247
+ int32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
248
+
249
+ if (mop & MO_BSWAP) {
250
+ ret = bswap32(ret);
251
+ }
252
+ return ret;
253
+}
254
+
255
+uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
256
+ MemOpIdx oi, uintptr_t ra)
257
+{
258
+ MemOp mop = get_memop(oi);
259
+ uint32_t ret;
260
+
261
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
262
+ ret = do_ld4_he_mmu(env, addr, mop, ra);
263
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
264
+ return cpu_to_be32(ret);
265
+}
266
+
267
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
268
MemOpIdx oi, uintptr_t ra)
269
{
270
- void *haddr;
271
+ MemOp mop = get_memop(oi);
272
uint32_t ret;
273
274
- validate_memop(oi, MO_LEUL);
275
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
276
- ret = load_atom_4(env, ra, haddr, get_memop(oi));
277
- clear_helper_retaddr();
278
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
279
+ ret = do_ld4_he_mmu(env, addr, mop, ra);
280
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
281
return cpu_to_le32(ret);
282
}
283
284
+static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
285
+ MemOp mop, uintptr_t ra)
286
+{
287
+ void *haddr;
288
+ uint64_t ret;
289
+
290
+ tcg_debug_assert((mop & MO_SIZE) == MO_64);
291
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
292
+ ret = load_atom_8(env, ra, haddr, mop);
293
+ clear_helper_retaddr();
294
+ return ret;
295
+}
296
+
297
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
298
+ MemOpIdx oi, uintptr_t ra)
299
+{
300
+ MemOp mop = get_memop(oi);
301
+ uint64_t ret = do_ld8_he_mmu(env, addr, mop, ra);
302
+
303
+ if (mop & MO_BSWAP) {
304
+ ret = bswap64(ret);
305
+ }
306
+ return ret;
307
+}
308
+
309
+uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
310
+ MemOpIdx oi, uintptr_t ra)
311
+{
312
+ MemOp mop = get_memop(oi);
313
+ uint64_t ret;
314
+
315
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
316
+ ret = do_ld8_he_mmu(env, addr, mop, ra);
317
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
318
+ return cpu_to_be64(ret);
319
+}
320
+
321
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
322
MemOpIdx oi, uintptr_t ra)
323
{
324
- void *haddr;
325
+ MemOp mop = get_memop(oi);
326
uint64_t ret;
327
328
- validate_memop(oi, MO_LEUQ);
329
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
330
- ret = load_atom_8(env, ra, haddr, get_memop(oi));
331
- clear_helper_retaddr();
332
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
333
+ ret = do_ld8_he_mmu(env, addr, mop, ra);
334
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
335
return cpu_to_le64(ret);
336
}
337
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
338
void *haddr;
339
Int128 ret;
340
341
- validate_memop(oi, MO_128 | MO_BE);
342
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
343
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
344
memcpy(&ret, haddr, 16);
345
clear_helper_retaddr();
346
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
347
void *haddr;
348
Int128 ret;
349
350
- validate_memop(oi, MO_128 | MO_LE);
351
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
352
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
353
memcpy(&ret, haddr, 16);
354
clear_helper_retaddr();
355
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
356
return ret;
357
}
358
359
-void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
360
- MemOpIdx oi, uintptr_t ra)
361
+static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
362
+ MemOp mop, uintptr_t ra)
363
{
364
void *haddr;
365
366
- validate_memop(oi, MO_UB);
367
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
368
+ tcg_debug_assert((mop & MO_SIZE) == MO_8);
369
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
370
stb_p(haddr, val);
371
clear_helper_retaddr();
372
+}
373
+
374
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
375
+ MemOpIdx oi, uintptr_t ra)
376
+{
377
+ do_st1_mmu(env, addr, val, get_memop(oi), ra);
378
+}
379
+
380
+void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
381
+ MemOpIdx oi, uintptr_t ra)
382
+{
383
+ do_st1_mmu(env, addr, val, get_memop(oi), ra);
384
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
385
}
386
387
+static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
388
+ MemOp mop, uintptr_t ra)
389
+{
390
+ void *haddr;
391
+
392
+ tcg_debug_assert((mop & MO_SIZE) == MO_16);
393
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
394
+ store_atom_2(env, ra, haddr, mop, val);
395
+ clear_helper_retaddr();
396
+}
397
+
398
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
399
+ MemOpIdx oi, uintptr_t ra)
400
+{
401
+ MemOp mop = get_memop(oi);
402
+
403
+ if (mop & MO_BSWAP) {
404
+ val = bswap16(val);
405
+ }
406
+ do_st2_he_mmu(env, addr, val, mop, ra);
407
+}
408
+
409
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
410
MemOpIdx oi, uintptr_t ra)
411
{
412
- void *haddr;
413
+ MemOp mop = get_memop(oi);
414
415
- validate_memop(oi, MO_BEUW);
416
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
417
- store_atom_2(env, ra, haddr, get_memop(oi), be16_to_cpu(val));
418
- clear_helper_retaddr();
419
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
420
-}
421
-
422
-void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
423
- MemOpIdx oi, uintptr_t ra)
424
-{
425
- void *haddr;
426
-
427
- validate_memop(oi, MO_BEUL);
428
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
429
- store_atom_4(env, ra, haddr, get_memop(oi), be32_to_cpu(val));
430
- clear_helper_retaddr();
431
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
432
-}
433
-
434
-void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
435
- MemOpIdx oi, uintptr_t ra)
436
-{
437
- void *haddr;
438
-
439
- validate_memop(oi, MO_BEUQ);
440
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
441
- store_atom_8(env, ra, haddr, get_memop(oi), be64_to_cpu(val));
442
- clear_helper_retaddr();
443
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
444
+ do_st2_he_mmu(env, addr, be16_to_cpu(val), mop, ra);
445
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
446
}
447
448
void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
449
MemOpIdx oi, uintptr_t ra)
450
+{
451
+ MemOp mop = get_memop(oi);
452
+
453
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
454
+ do_st2_he_mmu(env, addr, le16_to_cpu(val), mop, ra);
455
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
456
+}
457
+
458
+static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
459
+ MemOp mop, uintptr_t ra)
460
{
461
void *haddr;
462
463
- validate_memop(oi, MO_LEUW);
464
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
465
- store_atom_2(env, ra, haddr, get_memop(oi), le16_to_cpu(val));
466
+ tcg_debug_assert((mop & MO_SIZE) == MO_32);
467
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
468
+ store_atom_4(env, ra, haddr, mop, val);
469
clear_helper_retaddr();
470
+}
471
+
472
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
473
+ MemOpIdx oi, uintptr_t ra)
474
+{
475
+ MemOp mop = get_memop(oi);
476
+
477
+ if (mop & MO_BSWAP) {
478
+ val = bswap32(val);
479
+ }
480
+ do_st4_he_mmu(env, addr, val, mop, ra);
481
+}
482
+
483
+void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
484
+ MemOpIdx oi, uintptr_t ra)
485
+{
486
+ MemOp mop = get_memop(oi);
487
+
488
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
489
+ do_st4_he_mmu(env, addr, be32_to_cpu(val), mop, ra);
490
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
491
}
492
493
void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
494
MemOpIdx oi, uintptr_t ra)
495
+{
496
+ MemOp mop = get_memop(oi);
497
+
498
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
499
+ do_st4_he_mmu(env, addr, le32_to_cpu(val), mop, ra);
500
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
501
+}
502
+
503
+static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
504
+ MemOp mop, uintptr_t ra)
505
{
506
void *haddr;
507
508
- validate_memop(oi, MO_LEUL);
509
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
510
- store_atom_4(env, ra, haddr, get_memop(oi), le32_to_cpu(val));
511
+ tcg_debug_assert((mop & MO_SIZE) == MO_64);
512
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
513
+ store_atom_8(env, ra, haddr, mop, val);
514
clear_helper_retaddr();
515
+}
516
+
517
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
518
+ MemOpIdx oi, uintptr_t ra)
519
+{
520
+ MemOp mop = get_memop(oi);
521
+
522
+ if (mop & MO_BSWAP) {
523
+ val = bswap64(val);
524
+ }
525
+ do_st8_he_mmu(env, addr, val, mop, ra);
526
+}
527
+
528
+void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
529
+ MemOpIdx oi, uintptr_t ra)
530
+{
531
+ MemOp mop = get_memop(oi);
532
+
533
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
534
+ do_st8_he_mmu(env, addr, cpu_to_be64(val), mop, ra);
535
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
536
}
537
538
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
539
MemOpIdx oi, uintptr_t ra)
540
{
541
- void *haddr;
542
+ MemOp mop = get_memop(oi);
543
544
- validate_memop(oi, MO_LEUQ);
545
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
546
- store_atom_8(env, ra, haddr, get_memop(oi), le64_to_cpu(val));
547
- clear_helper_retaddr();
548
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
549
+ do_st8_he_mmu(env, addr, cpu_to_le64(val), mop, ra);
550
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
551
}
552
553
@@ -XXX,XX +XXX,XX @@ void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
554
{
555
void *haddr;
556
557
- validate_memop(oi, MO_128 | MO_BE);
558
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
559
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
560
if (!HOST_BIG_ENDIAN) {
561
val = bswap128(val);
562
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
563
{
564
void *haddr;
565
566
- validate_memop(oi, MO_128 | MO_LE);
567
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
568
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
569
if (HOST_BIG_ENDIAN) {
570
val = bswap128(val);
571
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
572
void *haddr;
573
uint64_t ret;
574
575
- validate_memop(oi, MO_BEUQ);
576
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
577
ret = ldq_p(haddr);
578
clear_helper_retaddr();
579
diff --git a/tcg/tcg.c b/tcg/tcg.c
580
index XXXXXXX..XXXXXXX 100644
581
--- a/tcg/tcg.c
582
+++ b/tcg/tcg.c
583
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
584
const TCGLdstHelperParam *p)
585
__attribute__((unused));
586
587
-#ifdef CONFIG_SOFTMMU
588
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
589
+static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
590
[MO_UB] = helper_ldub_mmu,
591
[MO_SB] = helper_ldsb_mmu,
592
[MO_UW] = helper_lduw_mmu,
593
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
594
#endif
595
};
596
597
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
598
+static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
599
[MO_8] = helper_stb_mmu,
600
[MO_16] = helper_stw_mmu,
601
[MO_32] = helper_stl_mmu,
602
[MO_64] = helper_stq_mmu,
603
};
604
-#endif
605
606
TCGContext tcg_init_ctx;
607
__thread TCGContext *tcg_ctx;
608
--
609
2.34.1
diff view generated by jsdifflib
New patch
1
We can now fold these two pieces of code.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/tci.c | 89 -------------------------------------------------------
7
1 file changed, 89 deletions(-)
8
9
diff --git a/tcg/tci.c b/tcg/tci.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tci.c
12
+++ b/tcg/tci.c
13
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
14
MemOp mop = get_memop(oi);
15
uintptr_t ra = (uintptr_t)tb_ptr;
16
17
-#ifdef CONFIG_SOFTMMU
18
switch (mop & MO_SSIZE) {
19
case MO_UB:
20
return helper_ldub_mmu(env, taddr, oi, ra);
21
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
22
default:
23
g_assert_not_reached();
24
}
25
-#else
26
- void *haddr = g2h(env_cpu(env), taddr);
27
- unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
28
- uint64_t ret;
29
-
30
- set_helper_retaddr(ra);
31
- if (taddr & a_mask) {
32
- helper_unaligned_ld(env, taddr);
33
- }
34
- switch (mop & (MO_BSWAP | MO_SSIZE)) {
35
- case MO_UB:
36
- ret = ldub_p(haddr);
37
- break;
38
- case MO_SB:
39
- ret = ldsb_p(haddr);
40
- break;
41
- case MO_LEUW:
42
- ret = lduw_le_p(haddr);
43
- break;
44
- case MO_LESW:
45
- ret = ldsw_le_p(haddr);
46
- break;
47
- case MO_LEUL:
48
- ret = (uint32_t)ldl_le_p(haddr);
49
- break;
50
- case MO_LESL:
51
- ret = (int32_t)ldl_le_p(haddr);
52
- break;
53
- case MO_LEUQ:
54
- ret = ldq_le_p(haddr);
55
- break;
56
- case MO_BEUW:
57
- ret = lduw_be_p(haddr);
58
- break;
59
- case MO_BESW:
60
- ret = ldsw_be_p(haddr);
61
- break;
62
- case MO_BEUL:
63
- ret = (uint32_t)ldl_be_p(haddr);
64
- break;
65
- case MO_BESL:
66
- ret = (int32_t)ldl_be_p(haddr);
67
- break;
68
- case MO_BEUQ:
69
- ret = ldq_be_p(haddr);
70
- break;
71
- default:
72
- g_assert_not_reached();
73
- }
74
- clear_helper_retaddr();
75
- return ret;
76
-#endif
77
}
78
79
static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
80
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
81
MemOp mop = get_memop(oi);
82
uintptr_t ra = (uintptr_t)tb_ptr;
83
84
-#ifdef CONFIG_SOFTMMU
85
switch (mop & MO_SIZE) {
86
case MO_UB:
87
helper_stb_mmu(env, taddr, val, oi, ra);
88
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
89
default:
90
g_assert_not_reached();
91
}
92
-#else
93
- void *haddr = g2h(env_cpu(env), taddr);
94
- unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
95
-
96
- set_helper_retaddr(ra);
97
- if (taddr & a_mask) {
98
- helper_unaligned_st(env, taddr);
99
- }
100
- switch (mop & (MO_BSWAP | MO_SIZE)) {
101
- case MO_UB:
102
- stb_p(haddr, val);
103
- break;
104
- case MO_LEUW:
105
- stw_le_p(haddr, val);
106
- break;
107
- case MO_LEUL:
108
- stl_le_p(haddr, val);
109
- break;
110
- case MO_LEUQ:
111
- stq_le_p(haddr, val);
112
- break;
113
- case MO_BEUW:
114
- stw_be_p(haddr, val);
115
- break;
116
- case MO_BEUL:
117
- stl_be_p(haddr, val);
118
- break;
119
- case MO_BEUQ:
120
- stq_be_p(haddr, val);
121
- break;
122
- default:
123
- g_assert_not_reached();
124
- }
125
- clear_helper_retaddr();
126
-#endif
127
}
128
129
#if TCG_TARGET_REG_BITS == 64
130
--
131
2.34.1
diff view generated by jsdifflib
1
Simplify the implementation of get_page_addr_code_hostp
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
by reusing the existing probe_access infrastructure.
3
4
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
5
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
accel/tcg/cputlb.c | 76 ++++++++++++++++------------------------------
4
accel/tcg/tcg-runtime.h | 3 +
9
1 file changed, 26 insertions(+), 50 deletions(-)
5
include/tcg/tcg-ldst.h | 4 +
6
accel/tcg/cputlb.c | 399 +++++++++++++++++++++++++--------
7
accel/tcg/user-exec.c | 94 ++++++--
8
tcg/tcg-op.c | 173 +++++++++-----
9
accel/tcg/ldst_atomicity.c.inc | 184 +++++++++++++++
10
6 files changed, 679 insertions(+), 178 deletions(-)
10
11
12
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/tcg-runtime.h
15
+++ b/accel/tcg/tcg-runtime.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
17
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
18
#endif /* IN_HELPER_PROTO */
19
20
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
21
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
22
+
23
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
24
i32, env, tl, i32, i32, i32)
25
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
26
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/include/tcg/tcg-ldst.h
29
+++ b/include/tcg/tcg-ldst.h
30
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
31
MemOpIdx oi, uintptr_t retaddr);
32
uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
33
MemOpIdx oi, uintptr_t retaddr);
34
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
35
+ MemOpIdx oi, uintptr_t retaddr);
36
37
/* Value sign-extended to tcg register size. */
38
tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
39
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
40
MemOpIdx oi, uintptr_t retaddr);
41
void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
42
MemOpIdx oi, uintptr_t retaddr);
43
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
44
+ MemOpIdx oi, uintptr_t retaddr);
45
46
#ifdef CONFIG_USER_ONLY
47
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
48
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
50
--- a/accel/tcg/cputlb.c
14
+++ b/accel/tcg/cputlb.c
51
+++ b/accel/tcg/cputlb.c
15
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
52
@@ -XXX,XX +XXX,XX @@
16
victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
53
#include "qemu/plugin-memory.h"
17
(ADDR) & TARGET_PAGE_MASK)
54
#endif
18
55
#include "tcg/tcg-ldst.h"
19
-/*
56
+#include "exec/helper-proto.h"
20
- * Return a ram_addr_t for the virtual address for execution.
57
21
- *
58
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
22
- * Return -1 if we can't translate and execute from an entire page
59
/* #define DEBUG_TLB */
23
- * of RAM. This will force us to execute by loading and translating
60
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
24
- * one insn at a time, without caching.
61
return (ret_be << (p->size * 8)) | x;
25
- *
62
}
26
- * NOTE: This function will trigger an exception if the page is
63
27
- * not executable.
64
+/**
28
- */
65
+ * do_ld_parts_be16
29
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
66
+ * @p: translation parameters
30
- void **hostp)
67
+ * @ret_be: accumulated data
31
-{
68
+ *
32
- uintptr_t mmu_idx = cpu_mmu_index(env, true);
69
+ * As do_ld_bytes_beN, but with one atomic load.
33
- uintptr_t index = tlb_index(env, mmu_idx, addr);
70
+ * 16 aligned bytes are guaranteed to cover the load.
34
- CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
71
+ */
35
- void *p;
72
+static Int128 do_ld_whole_be16(CPUArchState *env, uintptr_t ra,
36
-
73
+ MMULookupPageData *p, uint64_t ret_be)
37
- if (unlikely(!tlb_hit(entry->addr_code, addr))) {
74
+{
38
- if (!VICTIM_TLB_HIT(addr_code, addr)) {
75
+ int o = p->addr & 15;
39
- tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
76
+ Int128 x, y = load_atomic16_or_exit(env, ra, p->haddr - o);
40
- index = tlb_index(env, mmu_idx, addr);
77
+ int size = p->size;
41
- entry = tlb_entry(env, mmu_idx, addr);
78
+
42
-
79
+ if (!HOST_BIG_ENDIAN) {
43
- if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
80
+ y = bswap128(y);
44
- /*
81
+ }
45
- * The MMU protection covers a smaller range than a target
82
+ y = int128_lshift(y, o * 8);
46
- * page, so we must redo the MMU check for every insn.
83
+ y = int128_urshift(y, (16 - size) * 8);
47
- */
84
+ x = int128_make64(ret_be);
48
- return -1;
85
+ x = int128_lshift(x, size * 8);
49
- }
86
+ return int128_or(x, y);
50
- }
87
+}
51
- assert(tlb_hit(entry->addr_code, addr));
88
+
89
/*
90
* Wrapper for the above.
91
*/
92
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
93
}
94
}
95
96
+/*
97
+ * Wrapper for the above, for 8 < size < 16.
98
+ */
99
+static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
100
+ uint64_t a, int mmu_idx, MemOp mop, uintptr_t ra)
101
+{
102
+ int size = p->size;
103
+ uint64_t b;
104
+ MemOp atom;
105
+
106
+ if (unlikely(p->flags & TLB_MMIO)) {
107
+ p->size = size - 8;
108
+ a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra);
109
+ p->addr += p->size;
110
+ p->size = 8;
111
+ b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra);
112
+ return int128_make128(b, a);
113
+ }
114
+
115
+ /*
116
+ * It is a given that we cross a page and therefore there is no
117
+ * atomicity for the load as a whole, but subobjects may need attention.
118
+ */
119
+ atom = mop & MO_ATOM_MASK;
120
+ switch (atom) {
121
+ case MO_ATOM_SUBALIGN:
122
+ p->size = size - 8;
123
+ a = do_ld_parts_beN(p, a);
124
+ p->haddr += size - 8;
125
+ p->size = 8;
126
+ b = do_ld_parts_beN(p, 0);
127
+ break;
128
+
129
+ case MO_ATOM_WITHIN16_PAIR:
130
+ /* Since size > 8, this is the half that must be atomic. */
131
+ return do_ld_whole_be16(env, ra, p, a);
132
+
133
+ case MO_ATOM_IFALIGN_PAIR:
134
+ /*
135
+ * Since size > 8, both halves are misaligned,
136
+ * and so neither is atomic.
137
+ */
138
+ case MO_ATOM_IFALIGN:
139
+ case MO_ATOM_WITHIN16:
140
+ case MO_ATOM_NONE:
141
+ p->size = size - 8;
142
+ a = do_ld_bytes_beN(p, a);
143
+ b = ldq_be_p(p->haddr + size - 8);
144
+ break;
145
+
146
+ default:
147
+ g_assert_not_reached();
148
+ }
149
+
150
+ return int128_make128(b, a);
151
+}
152
+
153
static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
154
MMUAccessType type, uintptr_t ra)
155
{
156
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
157
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
158
}
159
160
+static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
161
+ MemOpIdx oi, uintptr_t ra)
162
+{
163
+ MMULookupLocals l;
164
+ bool crosspage;
165
+ uint64_t a, b;
166
+ Int128 ret;
167
+ int first;
168
+
169
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
170
+ if (likely(!crosspage)) {
171
+ /* Perform the load host endian. */
172
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
173
+ QEMU_IOTHREAD_LOCK_GUARD();
174
+ a = io_readx(env, l.page[0].full, l.mmu_idx, addr,
175
+ ra, MMU_DATA_LOAD, MO_64);
176
+ b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8,
177
+ ra, MMU_DATA_LOAD, MO_64);
178
+ ret = int128_make128(HOST_BIG_ENDIAN ? b : a,
179
+ HOST_BIG_ENDIAN ? a : b);
180
+ } else {
181
+ ret = load_atom_16(env, ra, l.page[0].haddr, l.memop);
182
+ }
183
+ if (l.memop & MO_BSWAP) {
184
+ ret = bswap128(ret);
185
+ }
186
+ return ret;
187
+ }
188
+
189
+ first = l.page[0].size;
190
+ if (first == 8) {
191
+ MemOp mop8 = (l.memop & ~MO_SIZE) | MO_64;
192
+
193
+ a = do_ld_8(env, &l.page[0], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
194
+ b = do_ld_8(env, &l.page[1], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
195
+ if ((mop8 & MO_BSWAP) == MO_LE) {
196
+ ret = int128_make128(a, b);
197
+ } else {
198
+ ret = int128_make128(b, a);
199
+ }
200
+ return ret;
201
+ }
202
+
203
+ if (first < 8) {
204
+ a = do_ld_beN(env, &l.page[0], 0, l.mmu_idx,
205
+ MMU_DATA_LOAD, l.memop, ra);
206
+ ret = do_ld16_beN(env, &l.page[1], a, l.mmu_idx, l.memop, ra);
207
+ } else {
208
+ ret = do_ld16_beN(env, &l.page[0], 0, l.mmu_idx, l.memop, ra);
209
+ b = int128_getlo(ret);
210
+ ret = int128_lshift(ret, l.page[1].size * 8);
211
+ a = int128_gethi(ret);
212
+ b = do_ld_beN(env, &l.page[1], b, l.mmu_idx,
213
+ MMU_DATA_LOAD, l.memop, ra);
214
+ ret = int128_make128(b, a);
215
+ }
216
+ if ((l.memop & MO_BSWAP) == MO_LE) {
217
+ ret = bswap128(ret);
218
+ }
219
+ return ret;
220
+}
221
+
222
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
223
+ uint32_t oi, uintptr_t retaddr)
224
+{
225
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
226
+ return do_ld16_mmu(env, addr, oi, retaddr);
227
+}
228
+
229
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
230
+{
231
+ return helper_ld16_mmu(env, addr, oi, GETPC());
232
+}
233
+
234
/*
235
* Load helpers for cpu_ldst.h.
236
*/
237
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
238
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
239
MemOpIdx oi, uintptr_t ra)
240
{
241
- MemOp mop = get_memop(oi);
242
- int mmu_idx = get_mmuidx(oi);
243
- MemOpIdx new_oi;
244
- unsigned a_bits;
245
- uint64_t h, l;
246
+ Int128 ret;
247
248
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
249
- a_bits = get_alignment_bits(mop);
250
-
251
- /* Handle CPU specific unaligned behaviour */
252
- if (addr & ((1 << a_bits) - 1)) {
253
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
254
- mmu_idx, ra);
52
- }
255
- }
53
-
256
-
54
- if (unlikely(entry->addr_code & TLB_MMIO)) {
257
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
55
- /* The region is not backed by RAM. */
258
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
56
- if (hostp) {
259
- new_oi = make_memop_idx(mop, mmu_idx);
57
- *hostp = NULL;
260
-
58
- }
261
- h = helper_ldq_mmu(env, addr, new_oi, ra);
59
- return -1;
262
- l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
263
-
264
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
265
- return int128_make128(l, h);
266
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
267
+ ret = do_ld16_mmu(env, addr, oi, ra);
268
+ plugin_load_cb(env, addr, oi);
269
+ return ret;
270
}
271
272
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
273
MemOpIdx oi, uintptr_t ra)
274
{
275
- MemOp mop = get_memop(oi);
276
- int mmu_idx = get_mmuidx(oi);
277
- MemOpIdx new_oi;
278
- unsigned a_bits;
279
- uint64_t h, l;
280
+ Int128 ret;
281
282
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
283
- a_bits = get_alignment_bits(mop);
284
-
285
- /* Handle CPU specific unaligned behaviour */
286
- if (addr & ((1 << a_bits) - 1)) {
287
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
288
- mmu_idx, ra);
60
- }
289
- }
61
-
290
-
62
- p = (void *)((uintptr_t)addr + entry->addend);
291
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
63
- if (hostp) {
292
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
64
- *hostp = p;
293
- new_oi = make_memop_idx(mop, mmu_idx);
294
-
295
- l = helper_ldq_mmu(env, addr, new_oi, ra);
296
- h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
297
-
298
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
299
- return int128_make128(l, h);
300
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
301
+ ret = do_ld16_mmu(env, addr, oi, ra);
302
+ plugin_load_cb(env, addr, oi);
303
+ return ret;
304
}
305
306
/*
307
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
308
}
309
}
310
311
+/*
312
+ * Wrapper for the above, for 8 < size < 16.
313
+ */
314
+static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
315
+ Int128 val_le, int mmu_idx,
316
+ MemOp mop, uintptr_t ra)
317
+{
318
+ int size = p->size;
319
+ MemOp atom;
320
+
321
+ if (unlikely(p->flags & TLB_MMIO)) {
322
+ p->size = 8;
323
+ do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra);
324
+ p->size = size - 8;
325
+ p->addr += 8;
326
+ return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra);
327
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
328
+ return int128_gethi(val_le) >> ((size - 8) * 8);
329
+ }
330
+
331
+ /*
332
+ * It is a given that we cross a page and therefore there is no atomicity
333
+ * for the store as a whole, but subobjects may need attention.
334
+ */
335
+ atom = mop & MO_ATOM_MASK;
336
+ switch (atom) {
337
+ case MO_ATOM_SUBALIGN:
338
+ store_parts_leN(p->haddr, 8, int128_getlo(val_le));
339
+ return store_parts_leN(p->haddr + 8, p->size - 8,
340
+ int128_gethi(val_le));
341
+
342
+ case MO_ATOM_WITHIN16_PAIR:
343
+ /* Since size > 8, this is the half that must be atomic. */
344
+ if (!HAVE_al16) {
345
+ cpu_loop_exit_atomic(env_cpu(env), ra);
346
+ }
347
+ return store_whole_le16(p->haddr, p->size, val_le);
348
+
349
+ case MO_ATOM_IFALIGN_PAIR:
350
+ /*
351
+ * Since size > 8, both halves are misaligned,
352
+ * and so neither is atomic.
353
+ */
354
+ case MO_ATOM_IFALIGN:
355
+ case MO_ATOM_NONE:
356
+ stq_le_p(p->haddr, int128_getlo(val_le));
357
+ return store_bytes_leN(p->haddr + 8, p->size - 8,
358
+ int128_gethi(val_le));
359
+
360
+ default:
361
+ g_assert_not_reached();
362
+ }
363
+}
364
+
365
static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
366
int mmu_idx, uintptr_t ra)
367
{
368
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
369
do_st8_mmu(env, addr, val, oi, retaddr);
370
}
371
372
+static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
373
+ MemOpIdx oi, uintptr_t ra)
374
+{
375
+ MMULookupLocals l;
376
+ bool crosspage;
377
+ uint64_t a, b;
378
+ int first;
379
+
380
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
381
+ if (likely(!crosspage)) {
382
+ /* Swap to host endian if necessary, then store. */
383
+ if (l.memop & MO_BSWAP) {
384
+ val = bswap128(val);
385
+ }
386
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
387
+ QEMU_IOTHREAD_LOCK_GUARD();
388
+ if (HOST_BIG_ENDIAN) {
389
+ b = int128_getlo(val), a = int128_gethi(val);
390
+ } else {
391
+ a = int128_getlo(val), b = int128_gethi(val);
392
+ }
393
+ io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64);
394
+ io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64);
395
+ } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
396
+ /* nothing */
397
+ } else {
398
+ store_atom_16(env, ra, l.page[0].haddr, l.memop, val);
399
+ }
400
+ return;
401
+ }
402
+
403
+ first = l.page[0].size;
404
+ if (first == 8) {
405
+ MemOp mop8 = (l.memop & ~(MO_SIZE | MO_BSWAP)) | MO_64;
406
+
407
+ if (l.memop & MO_BSWAP) {
408
+ val = bswap128(val);
409
+ }
410
+ if (HOST_BIG_ENDIAN) {
411
+ b = int128_getlo(val), a = int128_gethi(val);
412
+ } else {
413
+ a = int128_getlo(val), b = int128_gethi(val);
414
+ }
415
+ do_st_8(env, &l.page[0], a, l.mmu_idx, mop8, ra);
416
+ do_st_8(env, &l.page[1], b, l.mmu_idx, mop8, ra);
417
+ return;
418
+ }
419
+
420
+ if ((l.memop & MO_BSWAP) != MO_LE) {
421
+ val = bswap128(val);
422
+ }
423
+ if (first < 8) {
424
+ do_st_leN(env, &l.page[0], int128_getlo(val), l.mmu_idx, l.memop, ra);
425
+ val = int128_urshift(val, first * 8);
426
+ do_st16_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
427
+ } else {
428
+ b = do_st16_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
429
+ do_st_leN(env, &l.page[1], b, l.mmu_idx, l.memop, ra);
430
+ }
431
+}
432
+
433
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
434
+ MemOpIdx oi, uintptr_t retaddr)
435
+{
436
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
437
+ do_st16_mmu(env, addr, val, oi, retaddr);
438
+}
439
+
440
+void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
441
+ MemOpIdx oi)
442
+{
443
+ helper_st16_mmu(env, addr, val, oi, GETPC());
444
+}
445
+
446
/*
447
* Store Helpers for cpu_ldst.h
448
*/
449
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
450
plugin_store_cb(env, addr, oi);
451
}
452
453
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
454
- MemOpIdx oi, uintptr_t ra)
455
+void cpu_st16_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
456
+ MemOpIdx oi, uintptr_t retaddr)
457
{
458
- MemOp mop = get_memop(oi);
459
- int mmu_idx = get_mmuidx(oi);
460
- MemOpIdx new_oi;
461
- unsigned a_bits;
462
-
463
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
464
- a_bits = get_alignment_bits(mop);
465
-
466
- /* Handle CPU specific unaligned behaviour */
467
- if (addr & ((1 << a_bits) - 1)) {
468
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
469
- mmu_idx, ra);
65
- }
470
- }
66
- return qemu_ram_addr_from_host_nofail(p);
471
-
67
-}
472
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
68
-
473
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
69
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
474
- new_oi = make_memop_idx(mop, mmu_idx);
70
CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
475
-
71
{
476
- helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
72
@@ -XXX,XX +XXX,XX @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
477
- helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
73
return flags ? NULL : host;
478
-
74
}
479
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
480
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
481
+ do_st16_mmu(env, addr, val, oi, retaddr);
482
+ plugin_store_cb(env, addr, oi);
483
}
484
485
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
486
- MemOpIdx oi, uintptr_t ra)
487
+void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
488
+ MemOpIdx oi, uintptr_t retaddr)
489
{
490
- MemOp mop = get_memop(oi);
491
- int mmu_idx = get_mmuidx(oi);
492
- MemOpIdx new_oi;
493
- unsigned a_bits;
494
-
495
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
496
- a_bits = get_alignment_bits(mop);
497
-
498
- /* Handle CPU specific unaligned behaviour */
499
- if (addr & ((1 << a_bits) - 1)) {
500
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
501
- mmu_idx, ra);
502
- }
503
-
504
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
505
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
506
- new_oi = make_memop_idx(mop, mmu_idx);
507
-
508
- helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
509
- helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
510
-
511
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
512
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
513
+ do_st16_mmu(env, addr, val, oi, retaddr);
514
+ plugin_store_cb(env, addr, oi);
515
}
516
517
#include "ldst_common.c.inc"
518
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
519
index XXXXXXX..XXXXXXX 100644
520
--- a/accel/tcg/user-exec.c
521
+++ b/accel/tcg/user-exec.c
522
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
523
return cpu_to_le64(ret);
524
}
525
526
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
527
- MemOpIdx oi, uintptr_t ra)
528
+static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
529
+ MemOp mop, uintptr_t ra)
530
{
531
void *haddr;
532
Int128 ret;
533
534
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
535
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
536
- memcpy(&ret, haddr, 16);
537
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
538
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
539
+ ret = load_atom_16(env, ra, haddr, mop);
540
clear_helper_retaddr();
541
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
542
+ return ret;
543
+}
544
545
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
546
+ MemOpIdx oi, uintptr_t ra)
547
+{
548
+ MemOp mop = get_memop(oi);
549
+ Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
550
+
551
+ if (mop & MO_BSWAP) {
552
+ ret = bswap128(ret);
553
+ }
554
+ return ret;
555
+}
556
+
557
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
558
+{
559
+ return helper_ld16_mmu(env, addr, oi, GETPC());
560
+}
561
+
562
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
563
+ MemOpIdx oi, uintptr_t ra)
564
+{
565
+ MemOp mop = get_memop(oi);
566
+ Int128 ret;
567
+
568
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
569
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
570
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
571
if (!HOST_BIG_ENDIAN) {
572
ret = bswap128(ret);
573
}
574
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
575
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
576
MemOpIdx oi, uintptr_t ra)
577
{
578
- void *haddr;
579
+ MemOp mop = get_memop(oi);
580
Int128 ret;
581
582
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
583
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
584
- memcpy(&ret, haddr, 16);
585
- clear_helper_retaddr();
586
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
587
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
588
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
589
-
590
if (HOST_BIG_ENDIAN) {
591
ret = bswap128(ret);
592
}
593
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
594
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
595
}
596
597
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
598
- Int128 val, MemOpIdx oi, uintptr_t ra)
599
+static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
600
+ MemOp mop, uintptr_t ra)
601
{
602
void *haddr;
603
604
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
605
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
606
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
607
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
608
+ store_atom_16(env, ra, haddr, mop, val);
609
+ clear_helper_retaddr();
610
+}
611
+
612
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
613
+ MemOpIdx oi, uintptr_t ra)
614
+{
615
+ MemOp mop = get_memop(oi);
616
+
617
+ if (mop & MO_BSWAP) {
618
+ val = bswap128(val);
619
+ }
620
+ do_st16_he_mmu(env, addr, val, mop, ra);
621
+}
622
+
623
+void helper_st_i128(CPUArchState *env, target_ulong addr,
624
+ Int128 val, MemOpIdx oi)
625
+{
626
+ helper_st16_mmu(env, addr, val, oi, GETPC());
627
+}
628
+
629
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
630
+ Int128 val, MemOpIdx oi, uintptr_t ra)
631
+{
632
+ MemOp mop = get_memop(oi);
633
+
634
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
635
if (!HOST_BIG_ENDIAN) {
636
val = bswap128(val);
637
}
638
- memcpy(haddr, &val, 16);
639
- clear_helper_retaddr();
640
+ do_st16_he_mmu(env, addr, val, mop, ra);
641
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
642
}
643
644
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
645
Int128 val, MemOpIdx oi, uintptr_t ra)
646
{
647
- void *haddr;
648
+ MemOp mop = get_memop(oi);
649
650
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
651
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
652
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
653
if (HOST_BIG_ENDIAN) {
654
val = bswap128(val);
655
}
656
- memcpy(haddr, &val, 16);
657
- clear_helper_retaddr();
658
+ do_st16_he_mmu(env, addr, val, mop, ra);
659
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
660
}
661
662
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
663
index XXXXXXX..XXXXXXX 100644
664
--- a/tcg/tcg-op.c
665
+++ b/tcg/tcg-op.c
666
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
667
}
668
}
75
669
76
+/*
670
+/*
77
+ * Return a ram_addr_t for the virtual address for execution.
671
+ * Return true if @mop, without knowledge of the pointer alignment,
672
+ * does not require 16-byte atomicity, and it would be adventagous
673
+ * to avoid a call to a helper function.
674
+ */
675
+static bool use_two_i64_for_i128(MemOp mop)
676
+{
677
+#ifdef CONFIG_SOFTMMU
678
+ /* Two softmmu tlb lookups is larger than one function call. */
679
+ return false;
680
+#else
681
+ /*
682
+ * For user-only, two 64-bit operations may well be smaller than a call.
683
+ * Determine if that would be legal for the requested atomicity.
684
+ */
685
+ switch (mop & MO_ATOM_MASK) {
686
+ case MO_ATOM_NONE:
687
+ case MO_ATOM_IFALIGN_PAIR:
688
+ return true;
689
+ case MO_ATOM_IFALIGN:
690
+ case MO_ATOM_SUBALIGN:
691
+ case MO_ATOM_WITHIN16:
692
+ case MO_ATOM_WITHIN16_PAIR:
693
+ /* In a serialized context, no atomicity is required. */
694
+ return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
695
+ default:
696
+ g_assert_not_reached();
697
+ }
698
+#endif
699
+}
700
+
701
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
702
{
703
MemOp mop_1 = orig, mop_2;
704
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
705
ret[1] = mop_2;
706
}
707
708
+#if TARGET_LONG_BITS == 64
709
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
710
+#else
711
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
712
+#endif
713
+
714
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
715
{
716
- MemOp mop[2];
717
- TCGv addr_p8;
718
- TCGv_i64 x, y;
719
+ MemOpIdx oi = make_memop_idx(memop, idx);
720
721
- canonicalize_memop_i128_as_i64(mop, memop);
722
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
723
+ tcg_debug_assert((memop & MO_SIGN) == 0);
724
725
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
726
addr = plugin_prep_mem_callbacks(addr);
727
728
- /* TODO: respect atomicity of the operation. */
729
/* TODO: allow the tcg backend to see the whole operation. */
730
731
- /*
732
- * Since there are no global TCGv_i128, there is no visible state
733
- * changed if the second load faults. Load directly into the two
734
- * subwords.
735
- */
736
- if ((memop & MO_BSWAP) == MO_LE) {
737
- x = TCGV128_LOW(val);
738
- y = TCGV128_HIGH(val);
739
+ if (use_two_i64_for_i128(memop)) {
740
+ MemOp mop[2];
741
+ TCGv addr_p8;
742
+ TCGv_i64 x, y;
743
+
744
+ canonicalize_memop_i128_as_i64(mop, memop);
745
+
746
+ /*
747
+ * Since there are no global TCGv_i128, there is no visible state
748
+ * changed if the second load faults. Load directly into the two
749
+ * subwords.
750
+ */
751
+ if ((memop & MO_BSWAP) == MO_LE) {
752
+ x = TCGV128_LOW(val);
753
+ y = TCGV128_HIGH(val);
754
+ } else {
755
+ x = TCGV128_HIGH(val);
756
+ y = TCGV128_LOW(val);
757
+ }
758
+
759
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
760
+
761
+ if ((mop[0] ^ memop) & MO_BSWAP) {
762
+ tcg_gen_bswap64_i64(x, x);
763
+ }
764
+
765
+ addr_p8 = tcg_temp_ebb_new();
766
+ tcg_gen_addi_tl(addr_p8, addr, 8);
767
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
768
+ tcg_temp_free(addr_p8);
769
+
770
+ if ((mop[0] ^ memop) & MO_BSWAP) {
771
+ tcg_gen_bswap64_i64(y, y);
772
+ }
773
} else {
774
- x = TCGV128_HIGH(val);
775
- y = TCGV128_LOW(val);
776
+ gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
777
}
778
779
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
780
-
781
- if ((mop[0] ^ memop) & MO_BSWAP) {
782
- tcg_gen_bswap64_i64(x, x);
783
- }
784
-
785
- addr_p8 = tcg_temp_new();
786
- tcg_gen_addi_tl(addr_p8, addr, 8);
787
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
788
- tcg_temp_free(addr_p8);
789
-
790
- if ((mop[0] ^ memop) & MO_BSWAP) {
791
- tcg_gen_bswap64_i64(y, y);
792
- }
793
-
794
- plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
795
- QEMU_PLUGIN_MEM_R);
796
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
797
}
798
799
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
800
{
801
- MemOp mop[2];
802
- TCGv addr_p8;
803
- TCGv_i64 x, y;
804
+ MemOpIdx oi = make_memop_idx(memop, idx);
805
806
- canonicalize_memop_i128_as_i64(mop, memop);
807
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
808
+ tcg_debug_assert((memop & MO_SIGN) == 0);
809
810
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
811
addr = plugin_prep_mem_callbacks(addr);
812
813
- /* TODO: respect atomicity of the operation. */
814
/* TODO: allow the tcg backend to see the whole operation. */
815
816
- if ((memop & MO_BSWAP) == MO_LE) {
817
- x = TCGV128_LOW(val);
818
- y = TCGV128_HIGH(val);
819
+ if (use_two_i64_for_i128(memop)) {
820
+ MemOp mop[2];
821
+ TCGv addr_p8;
822
+ TCGv_i64 x, y;
823
+
824
+ canonicalize_memop_i128_as_i64(mop, memop);
825
+
826
+ if ((memop & MO_BSWAP) == MO_LE) {
827
+ x = TCGV128_LOW(val);
828
+ y = TCGV128_HIGH(val);
829
+ } else {
830
+ x = TCGV128_HIGH(val);
831
+ y = TCGV128_LOW(val);
832
+ }
833
+
834
+ addr_p8 = tcg_temp_ebb_new();
835
+ if ((mop[0] ^ memop) & MO_BSWAP) {
836
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
837
+
838
+ tcg_gen_bswap64_i64(t, x);
839
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
840
+ tcg_gen_bswap64_i64(t, y);
841
+ tcg_gen_addi_tl(addr_p8, addr, 8);
842
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
843
+ tcg_temp_free_i64(t);
844
+ } else {
845
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
846
+ tcg_gen_addi_tl(addr_p8, addr, 8);
847
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
848
+ }
849
+ tcg_temp_free(addr_p8);
850
} else {
851
- x = TCGV128_HIGH(val);
852
- y = TCGV128_LOW(val);
853
+ gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
854
}
855
856
- addr_p8 = tcg_temp_new();
857
- if ((mop[0] ^ memop) & MO_BSWAP) {
858
- TCGv_i64 t = tcg_temp_ebb_new_i64();
859
-
860
- tcg_gen_bswap64_i64(t, x);
861
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
862
- tcg_gen_bswap64_i64(t, y);
863
- tcg_gen_addi_tl(addr_p8, addr, 8);
864
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
865
- tcg_temp_free_i64(t);
866
- } else {
867
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
868
- tcg_gen_addi_tl(addr_p8, addr, 8);
869
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
870
- }
871
- tcg_temp_free(addr_p8);
872
-
873
- plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
874
- QEMU_PLUGIN_MEM_W);
875
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
876
}
877
878
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
879
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
880
index XXXXXXX..XXXXXXX 100644
881
--- a/accel/tcg/ldst_atomicity.c.inc
882
+++ b/accel/tcg/ldst_atomicity.c.inc
883
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atom_8_by_4(void *pv)
884
}
885
}
886
887
+/**
888
+ * load_atom_8_by_8_or_4:
889
+ * @pv: host address
78
+ *
890
+ *
79
+ * Return -1 if we can't translate and execute from an entire page
891
+ * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
80
+ * of RAM. This will force us to execute by loading and translating
892
+ */
81
+ * one insn at a time, without caching.
893
+static inline uint64_t load_atom_8_by_8_or_4(void *pv)
894
+{
895
+ if (HAVE_al8_fast) {
896
+ return load_atomic8(pv);
897
+ } else {
898
+ return load_atom_8_by_4(pv);
899
+ }
900
+}
901
+
902
/**
903
* load_atom_2:
904
* @p: host address
905
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
906
}
907
}
908
909
+/**
910
+ * load_atom_16:
911
+ * @p: host address
912
+ * @memop: the full memory op
82
+ *
913
+ *
83
+ * NOTE: This function will trigger an exception if the page is
914
+ * Load 16 bytes from @p, honoring the atomicity of @memop.
84
+ * not executable.
85
+ */
915
+ */
86
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
916
+static Int128 load_atom_16(CPUArchState *env, uintptr_t ra,
87
+ void **hostp)
917
+ void *pv, MemOp memop)
88
+{
918
+{
89
+ void *p;
919
+ uintptr_t pi = (uintptr_t)pv;
90
+
920
+ int atmax;
91
+ (void)probe_access_internal(env, addr, 1, MMU_INST_FETCH,
921
+ Int128 r;
92
+ cpu_mmu_index(env, true), false, &p, 0);
922
+ uint64_t a, b;
93
+ if (p == NULL) {
923
+
94
+ return -1;
924
+ /*
95
+ }
925
+ * If the host does not support 16-byte atomics, wait until we have
96
+ if (hostp) {
926
+ * examined the atomicity parameters below.
97
+ *hostp = p;
927
+ */
98
+ }
928
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
99
+ return qemu_ram_addr_from_host_nofail(p);
929
+ return load_atomic16(pv);
100
+}
930
+ }
101
+
931
+
102
#ifdef CONFIG_PLUGIN
932
+ atmax = required_atomicity(env, pi, memop);
103
/*
933
+ switch (atmax) {
104
* Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
934
+ case MO_8:
935
+ memcpy(&r, pv, 16);
936
+ return r;
937
+ case MO_16:
938
+ a = load_atom_8_by_2(pv);
939
+ b = load_atom_8_by_2(pv + 8);
940
+ break;
941
+ case MO_32:
942
+ a = load_atom_8_by_4(pv);
943
+ b = load_atom_8_by_4(pv + 8);
944
+ break;
945
+ case MO_64:
946
+ if (!HAVE_al8) {
947
+ cpu_loop_exit_atomic(env_cpu(env), ra);
948
+ }
949
+ a = load_atomic8(pv);
950
+ b = load_atomic8(pv + 8);
951
+ break;
952
+ case -MO_64:
953
+ if (!HAVE_al8) {
954
+ cpu_loop_exit_atomic(env_cpu(env), ra);
955
+ }
956
+ a = load_atom_extract_al8x2(pv);
957
+ b = load_atom_extract_al8x2(pv + 8);
958
+ break;
959
+ case MO_128:
960
+ return load_atomic16_or_exit(env, ra, pv);
961
+ default:
962
+ g_assert_not_reached();
963
+ }
964
+ return int128_make128(HOST_BIG_ENDIAN ? b : a, HOST_BIG_ENDIAN ? a : b);
965
+}
966
+
967
/**
968
* store_atomic2:
969
* @pv: host address
970
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
971
qatomic_set__nocheck(p, val);
972
}
973
974
+/**
975
+ * store_atomic16:
976
+ * @pv: host address
977
+ * @val: value to store
978
+ *
979
+ * Atomically store 16 aligned bytes to @pv.
980
+ */
981
+static inline void store_atomic16(void *pv, Int128Alias val)
982
+{
983
+#if defined(CONFIG_ATOMIC128)
984
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
985
+ qatomic_set__nocheck(pu, val.u);
986
+#elif defined(CONFIG_CMPXCHG128)
987
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
988
+ __uint128_t o;
989
+
990
+ /*
991
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
992
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
993
+ * and accept the sequential consistency that comes with it.
994
+ */
995
+ do {
996
+ o = *pu;
997
+ } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
998
+#else
999
+ qemu_build_not_reached();
1000
+#endif
1001
+}
1002
+
1003
/**
1004
* store_atom_4x2
1005
*/
1006
@@ -XXX,XX +XXX,XX @@ static void store_atom_8(CPUArchState *env, uintptr_t ra,
1007
}
1008
cpu_loop_exit_atomic(env_cpu(env), ra);
1009
}
1010
+
1011
+/**
1012
+ * store_atom_16:
1013
+ * @p: host address
1014
+ * @val: the value to store
1015
+ * @memop: the full memory op
1016
+ *
1017
+ * Store 16 bytes to @p, honoring the atomicity of @memop.
1018
+ */
1019
+static void store_atom_16(CPUArchState *env, uintptr_t ra,
1020
+ void *pv, MemOp memop, Int128 val)
1021
+{
1022
+ uintptr_t pi = (uintptr_t)pv;
1023
+ uint64_t a, b;
1024
+ int atmax;
1025
+
1026
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
1027
+ store_atomic16(pv, val);
1028
+ return;
1029
+ }
1030
+
1031
+ atmax = required_atomicity(env, pi, memop);
1032
+
1033
+ a = HOST_BIG_ENDIAN ? int128_gethi(val) : int128_getlo(val);
1034
+ b = HOST_BIG_ENDIAN ? int128_getlo(val) : int128_gethi(val);
1035
+ switch (atmax) {
1036
+ case MO_8:
1037
+ memcpy(pv, &val, 16);
1038
+ return;
1039
+ case MO_16:
1040
+ store_atom_8_by_2(pv, a);
1041
+ store_atom_8_by_2(pv + 8, b);
1042
+ return;
1043
+ case MO_32:
1044
+ store_atom_8_by_4(pv, a);
1045
+ store_atom_8_by_4(pv + 8, b);
1046
+ return;
1047
+ case MO_64:
1048
+ if (HAVE_al8) {
1049
+ store_atomic8(pv, a);
1050
+ store_atomic8(pv + 8, b);
1051
+ return;
1052
+ }
1053
+ break;
1054
+ case -MO_64:
1055
+ if (HAVE_al16) {
1056
+ uint64_t val_le;
1057
+ int s2 = pi & 15;
1058
+ int s1 = 16 - s2;
1059
+
1060
+ if (HOST_BIG_ENDIAN) {
1061
+ val = bswap128(val);
1062
+ }
1063
+ switch (s2) {
1064
+ case 1 ... 7:
1065
+ val_le = store_whole_le16(pv, s1, val);
1066
+ store_bytes_leN(pv + s1, s2, val_le);
1067
+ break;
1068
+ case 9 ... 15:
1069
+ store_bytes_leN(pv, s1, int128_getlo(val));
1070
+ val = int128_urshift(val, s1 * 8);
1071
+ store_whole_le16(pv + s1, s2, val);
1072
+ break;
1073
+ case 0: /* aligned */
1074
+ case 8: /* atmax MO_64 */
1075
+ default:
1076
+ g_assert_not_reached();
1077
+ }
1078
+ return;
1079
+ }
1080
+ break;
1081
+ case MO_128:
1082
+ if (HAVE_al16) {
1083
+ store_atomic16(pv, val);
1084
+ return;
1085
+ }
1086
+ break;
1087
+ default:
1088
+ g_assert_not_reached();
1089
+ }
1090
+ cpu_loop_exit_atomic(env_cpu(env), ra);
1091
+}
105
--
1092
--
106
2.34.1
1093
2.34.1
diff view generated by jsdifflib
New patch
1
There is an edge condition prior to gcc13 for which optimization
2
is required to generate 16-byte atomic sequences. Detect this.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
meson.build | 52 ++++++++++++++++++++++------------
8
accel/tcg/ldst_atomicity.c.inc | 29 ++++++++++++++++---
9
2 files changed, 59 insertions(+), 22 deletions(-)
10
11
diff --git a/meson.build b/meson.build
12
index XXXXXXX..XXXXXXX 100644
13
--- a/meson.build
14
+++ b/meson.build
15
@@ -XXX,XX +XXX,XX @@ config_host_data.set('HAVE_BROKEN_SIZE_MAX', not cc.compiles('''
16
return printf("%zu", SIZE_MAX);
17
}''', args: ['-Werror']))
18
19
-atomic_test = '''
20
+# See if 64-bit atomic operations are supported.
21
+# Note that without __atomic builtins, we can only
22
+# assume atomic loads/stores max at pointer size.
23
+config_host_data.set('CONFIG_ATOMIC64', cc.links('''
24
#include <stdint.h>
25
int main(void)
26
{
27
- @0@ x = 0, y = 0;
28
+ uint64_t x = 0, y = 0;
29
y = __atomic_load_n(&x, __ATOMIC_RELAXED);
30
__atomic_store_n(&x, y, __ATOMIC_RELAXED);
31
__atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
32
__atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
33
__atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
34
return 0;
35
- }'''
36
-
37
-# See if 64-bit atomic operations are supported.
38
-# Note that without __atomic builtins, we can only
39
-# assume atomic loads/stores max at pointer size.
40
-config_host_data.set('CONFIG_ATOMIC64', cc.links(atomic_test.format('uint64_t')))
41
+ }'''))
42
43
has_int128 = cc.links('''
44
__int128_t a;
45
@@ -XXX,XX +XXX,XX @@ if has_int128
46
# "do we have 128-bit atomics which are handled inline and specifically not
47
# via libatomic". The reason we can't use libatomic is documented in the
48
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
49
- has_atomic128 = cc.links(atomic_test.format('unsigned __int128'))
50
+ # We only care about these operations on 16-byte aligned pointers, so
51
+ # force 16-byte alignment of the pointer, which may be greater than
52
+ # __alignof(unsigned __int128) for the host.
53
+ atomic_test_128 = '''
54
+ int main(int ac, char **av) {
55
+ unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16));
56
+ p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
57
+ __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
58
+ __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
59
+ return 0;
60
+ }'''
61
+ has_atomic128 = cc.links(atomic_test_128)
62
63
config_host_data.set('CONFIG_ATOMIC128', has_atomic128)
64
65
if not has_atomic128
66
- has_cmpxchg128 = cc.links('''
67
- int main(void)
68
- {
69
- unsigned __int128 x = 0, y = 0;
70
- __sync_val_compare_and_swap_16(&x, y, x);
71
- return 0;
72
- }
73
- ''')
74
+ # Even with __builtin_assume_aligned, the above test may have failed
75
+ # without optimization enabled. Try again with optimizations locally
76
+ # enabled for the function. See
77
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
78
+ has_atomic128_opt = cc.links('__attribute__((optimize("O1")))' + atomic_test_128)
79
+ config_host_data.set('CONFIG_ATOMIC128_OPT', has_atomic128_opt)
80
81
- config_host_data.set('CONFIG_CMPXCHG128', has_cmpxchg128)
82
+ if not has_atomic128_opt
83
+ config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
84
+ int main(void)
85
+ {
86
+ unsigned __int128 x = 0, y = 0;
87
+ __sync_val_compare_and_swap_16(&x, y, x);
88
+ return 0;
89
+ }
90
+ '''))
91
+ endif
92
endif
93
endif
94
95
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
96
index XXXXXXX..XXXXXXX 100644
97
--- a/accel/tcg/ldst_atomicity.c.inc
98
+++ b/accel/tcg/ldst_atomicity.c.inc
99
@@ -XXX,XX +XXX,XX @@
100
#endif
101
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
102
103
+/*
104
+ * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
105
+ * that are supported by the host, e.g. s390x. We can force the pointer to
106
+ * have our known alignment with __builtin_assume_aligned, however prior to
107
+ * GCC 13 that was only reliable with optimization enabled. See
108
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
109
+ */
110
+#if defined(CONFIG_ATOMIC128_OPT)
111
+# if !defined(__OPTIMIZE__)
112
+# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
113
+# endif
114
+# define CONFIG_ATOMIC128
115
+#endif
116
+#ifndef ATTRIBUTE_ATOMIC128_OPT
117
+# define ATTRIBUTE_ATOMIC128_OPT
118
+#endif
119
+
120
#if defined(CONFIG_ATOMIC128)
121
# define HAVE_al16_fast true
122
#else
123
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atomic8(void *pv)
124
*
125
* Atomically load 16 aligned bytes from @pv.
126
*/
127
-static inline Int128 load_atomic16(void *pv)
128
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
129
+load_atomic16(void *pv)
130
{
131
#ifdef CONFIG_ATOMIC128
132
__uint128_t *p = __builtin_assume_aligned(pv, 16);
133
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
134
* cross an 16-byte boundary then the access must be 16-byte atomic,
135
* otherwise the access must be 8-byte atomic.
136
*/
137
-static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
138
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
139
+load_atom_extract_al16_or_al8(void *pv, int s)
140
{
141
#if defined(CONFIG_ATOMIC128)
142
uintptr_t pi = (uintptr_t)pv;
143
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
144
*
145
* Atomically store 16 aligned bytes to @pv.
146
*/
147
-static inline void store_atomic16(void *pv, Int128Alias val)
148
+static inline void ATTRIBUTE_ATOMIC128_OPT
149
+store_atomic16(void *pv, Int128Alias val)
150
{
151
#if defined(CONFIG_ATOMIC128)
152
__uint128_t *pu = __builtin_assume_aligned(pv, 16);
153
@@ -XXX,XX +XXX,XX @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
154
*
155
* Atomically store @val to @p masked by @msk.
156
*/
157
-static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
158
+static void ATTRIBUTE_ATOMIC128_OPT
159
+store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
160
{
161
#if defined(CONFIG_ATOMIC128)
162
__uint128_t *pu, old, new;
163
--
164
2.34.1
diff view generated by jsdifflib
New patch
1
Notice when Intel or AMD have guaranteed that vmovdqa is atomic.
2
The new variable will also be used in generated code.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/qemu/cpuid.h | 18 ++++++++++++++++++
8
tcg/i386/tcg-target.h | 1 +
9
tcg/i386/tcg-target.c.inc | 27 +++++++++++++++++++++++++++
10
3 files changed, 46 insertions(+)
11
12
diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/qemu/cpuid.h
15
+++ b/include/qemu/cpuid.h
16
@@ -XXX,XX +XXX,XX @@
17
#define bit_LZCNT (1 << 5)
18
#endif
19
20
+/*
21
+ * Signatures for different CPU implementations as returned from Leaf 0.
22
+ */
23
+
24
+#ifndef signature_INTEL_ecx
25
+/* "Genu" "ineI" "ntel" */
26
+#define signature_INTEL_ebx 0x756e6547
27
+#define signature_INTEL_edx 0x49656e69
28
+#define signature_INTEL_ecx 0x6c65746e
29
+#endif
30
+
31
+#ifndef signature_AMD_ecx
32
+/* "Auth" "enti" "cAMD" */
33
+#define signature_AMD_ebx 0x68747541
34
+#define signature_AMD_edx 0x69746e65
35
+#define signature_AMD_ecx 0x444d4163
36
+#endif
37
+
38
static inline unsigned xgetbv_low(unsigned c)
39
{
40
unsigned a, d;
41
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/tcg/i386/tcg-target.h
44
+++ b/tcg/i386/tcg-target.h
45
@@ -XXX,XX +XXX,XX @@ extern bool have_avx512dq;
46
extern bool have_avx512vbmi2;
47
extern bool have_avx512vl;
48
extern bool have_movbe;
49
+extern bool have_atomic16;
50
51
/* optional instructions */
52
#define TCG_TARGET_HAS_div2_i32 1
53
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/i386/tcg-target.c.inc
56
+++ b/tcg/i386/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ bool have_avx512dq;
58
bool have_avx512vbmi2;
59
bool have_avx512vl;
60
bool have_movbe;
61
+bool have_atomic16;
62
63
#ifdef CONFIG_CPUID_H
64
static bool have_bmi2;
65
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
66
have_avx512dq = (b7 & bit_AVX512DQ) != 0;
67
have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
68
}
69
+
70
+ /*
71
+ * The Intel SDM has added:
72
+ * Processors that enumerate support for Intel® AVX
73
+ * (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
74
+ * guarantee that the 16-byte memory operations performed
75
+ * by the following instructions will always be carried
76
+ * out atomically:
77
+ * - MOVAPD, MOVAPS, and MOVDQA.
78
+ * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
79
+ * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
80
+ * with EVEX.128 and k0 (masking disabled).
81
+ * Note that these instructions require the linear addresses
82
+ * of their memory operands to be 16-byte aligned.
83
+ *
84
+ * AMD has provided an even stronger guarantee that processors
85
+ * with AVX provide 16-byte atomicity for all cachable,
86
+ * naturally aligned single loads and stores, e.g. MOVDQU.
87
+ *
88
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
89
+ */
90
+ if (have_avx1) {
91
+ __cpuid(0, a, b, c, d);
92
+ have_atomic16 = (c == signature_INTEL_ecx ||
93
+ c == signature_AMD_ecx);
94
+ }
95
}
96
}
97
}
98
--
99
2.34.1
100
101
diff view generated by jsdifflib
New patch
1
Notice when the host has additional atomic instructions.
2
The new variables will also be used in generated code.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.h | 3 +++
9
tcg/aarch64/tcg-target.c.inc | 12 ++++++++++++
10
2 files changed, 15 insertions(+)
11
12
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/aarch64/tcg-target.h
15
+++ b/tcg/aarch64/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ typedef enum {
17
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
18
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
19
20
+extern bool have_lse;
21
+extern bool have_lse2;
22
+
23
/* optional instructions */
24
#define TCG_TARGET_HAS_div_i32 1
25
#define TCG_TARGET_HAS_rem_i32 1
26
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/aarch64/tcg-target.c.inc
29
+++ b/tcg/aarch64/tcg-target.c.inc
30
@@ -XXX,XX +XXX,XX @@
31
#include "../tcg-ldst.c.inc"
32
#include "../tcg-pool.c.inc"
33
#include "qemu/bitops.h"
34
+#ifdef __linux__
35
+#include <asm/hwcap.h>
36
+#endif
37
38
/* We're going to re-use TCGType in setting of the SF bit, which controls
39
the size of the operation performed. If we know the values match, it
40
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
41
return TCG_REG_X0 + slot;
42
}
43
44
+bool have_lse;
45
+bool have_lse2;
46
+
47
#define TCG_REG_TMP TCG_REG_X30
48
#define TCG_VEC_TMP TCG_REG_V31
49
50
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
51
52
static void tcg_target_init(TCGContext *s)
53
{
54
+#ifdef __linux__
55
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
56
+ have_lse = hwcap & HWCAP_ATOMICS;
57
+ have_lse2 = hwcap & HWCAP_USCAT;
58
+#endif
59
+
60
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
61
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
62
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
63
--
64
2.34.1
65
66
diff view generated by jsdifflib
1
We're about to start validating PAGE_EXEC, which means that we've
1
These features are present for Apple M1.
2
got to mark the vsyscall page executable. We had been special
3
casing this entirely within translate.
4
2
5
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
3
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
linux-user/elfload.c | 23 +++++++++++++++++++++++
8
tcg/aarch64/tcg-target.c.inc | 28 ++++++++++++++++++++++++++++
10
1 file changed, 23 insertions(+)
9
1 file changed, 28 insertions(+)
11
10
12
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/linux-user/elfload.c
13
--- a/tcg/aarch64/tcg-target.c.inc
15
+++ b/linux-user/elfload.c
14
+++ b/tcg/aarch64/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
15
@@ -XXX,XX +XXX,XX @@
17
(*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff);
16
#ifdef __linux__
17
#include <asm/hwcap.h>
18
#endif
19
+#ifdef CONFIG_DARWIN
20
+#include <sys/sysctl.h>
21
+#endif
22
23
/* We're going to re-use TCGType in setting of the SF bit, which controls
24
the size of the operation performed. If we know the values match, it
25
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
26
}
18
}
27
}
19
28
20
+#if ULONG_MAX >= TARGET_VSYSCALL_PAGE
29
+#ifdef CONFIG_DARWIN
21
+#define INIT_GUEST_COMMPAGE
30
+static bool sysctl_for_bool(const char *name)
22
+static bool init_guest_commpage(void)
23
+{
31
+{
32
+ int val = 0;
33
+ size_t len = sizeof(val);
34
+
35
+ if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
36
+ return val != 0;
37
+ }
38
+
24
+ /*
39
+ /*
25
+ * The vsyscall page is at a high negative address aka kernel space,
40
+ * We might in the future ask for properties not present in older kernels,
26
+ * which means that we cannot actually allocate it with target_mmap.
41
+ * but we're only asking about static properties, all of which should be
27
+ * We still should be able to use page_set_flags, unless the user
42
+ * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
28
+ * has specified -R reserved_va, which would trigger an assert().
43
+ * more exotic errors.
29
+ */
44
+ */
30
+ if (reserved_va != 0 &&
45
+ assert(errno == ENOENT);
31
+ TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE >= reserved_va) {
46
+ return false;
32
+ error_report("Cannot allocate vsyscall page");
33
+ exit(EXIT_FAILURE);
34
+ }
35
+ page_set_flags(TARGET_VSYSCALL_PAGE,
36
+ TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE,
37
+ PAGE_EXEC | PAGE_VALID);
38
+ return true;
39
+}
47
+}
40
+#endif
48
+#endif
41
#else
49
+
42
50
static void tcg_target_init(TCGContext *s)
43
#define ELF_START_MMAP 0x80000000
51
{
44
@@ -XXX,XX +XXX,XX @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
52
#ifdef __linux__
45
#else
53
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
46
#define HI_COMMPAGE 0
54
have_lse = hwcap & HWCAP_ATOMICS;
47
#define LO_COMMPAGE -1
55
have_lse2 = hwcap & HWCAP_USCAT;
48
+#ifndef INIT_GUEST_COMMPAGE
49
#define init_guest_commpage() true
50
#endif
56
#endif
57
+#ifdef CONFIG_DARWIN
58
+ have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
59
+ have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
51
+#endif
60
+#endif
52
61
53
static void pgb_fail_in_use(const char *image_name)
62
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
54
{
63
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
55
--
64
--
56
2.34.1
65
2.34.1
66
67
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 52 +++------------------------------------
9
1 file changed, 4 insertions(+), 48 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
int seg;
17
} HostAddress;
18
19
-#if defined(CONFIG_SOFTMMU)
20
/*
21
* Because i686 has no register parameters and because x86_64 has xchg
22
* to handle addr/data register overlap, we have placed all input arguments
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
25
/* resolve label address */
26
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
27
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
28
+ if (label_ptr[1]) {
29
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
30
}
31
32
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
33
34
/* resolve label address */
35
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
36
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
37
+ if (label_ptr[1]) {
38
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
42
tcg_out_jmp(s, l->raddr);
43
return true;
44
}
45
-#else
46
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
47
-{
48
- /* resolve label address */
49
- tcg_patch32(l->label_ptr[0], s->code_ptr - l->label_ptr[0] - 4);
50
-
51
- if (TCG_TARGET_REG_BITS == 32) {
52
- int ofs = 0;
53
-
54
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
55
- ofs += 4;
56
-
57
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
58
- ofs += 4;
59
- if (TARGET_LONG_BITS == 64) {
60
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
61
- ofs += 4;
62
- }
63
-
64
- tcg_out_pushi(s, (uintptr_t)l->raddr);
65
- } else {
66
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
67
- l->addrlo_reg);
68
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
69
-
70
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, (uintptr_t)l->raddr);
71
- tcg_out_push(s, TCG_REG_RAX);
72
- }
73
-
74
- /* "Tail call" to the helper, with the return address back inline. */
75
- tcg_out_jmp(s, (const void *)(l->is_ld ? helper_unaligned_ld
76
- : helper_unaligned_st));
77
- return true;
78
-}
79
-
80
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
81
-{
82
- return tcg_out_fail_alignment(s, l);
83
-}
84
-
85
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
86
-{
87
- return tcg_out_fail_alignment(s, l);
88
-}
89
90
+#ifndef CONFIG_SOFTMMU
91
static HostAddress x86_guest_base = {
92
.index = -1
93
};
94
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
95
return 0;
96
}
97
#endif /* setup_guest_base_seg */
98
-#endif /* SOFTMMU */
99
+#endif /* !SOFTMMU */
100
101
/*
102
* For softmmu, perform the TLB load and compare.
103
--
104
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.c.inc | 35 -----------------------------------
9
1 file changed, 35 deletions(-)
10
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
TCGType index_ext;
17
} HostAddress;
18
19
-#ifdef CONFIG_SOFTMMU
20
static const TCGLdstHelperParam ldst_helper_param = {
21
.ntmp = 1, .tmp = { TCG_REG_TMP }
22
};
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_goto(s, lb->raddr);
25
return true;
26
}
27
-#else
28
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
29
-{
30
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
31
- tcg_debug_assert(offset == sextract64(offset, 0, 21));
32
- tcg_out_insn(s, 3406, ADR, rd, offset);
33
-}
34
-
35
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
36
-{
37
- if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
38
- return false;
39
- }
40
-
41
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
42
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
43
-
44
- /* "Tail call" to the helper, with the return address back inline. */
45
- tcg_out_adr(s, TCG_REG_LR, l->raddr);
46
- tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
47
- : helper_unaligned_st));
48
- return true;
49
-}
50
-
51
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
52
-{
53
- return tcg_out_fail_alignment(s, l);
54
-}
55
-
56
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
57
-{
58
- return tcg_out_fail_alignment(s, l);
59
-}
60
-#endif /* CONFIG_SOFTMMU */
61
62
/*
63
* For softmmu, perform the TLB load and compare.
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.c.inc | 44 ----------------------------------------
9
1 file changed, 44 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/ppc/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
16
[MO_BSWAP | MO_UQ] = STDBRX,
17
};
18
19
-#if defined (CONFIG_SOFTMMU)
20
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
{
22
if (arg < 0) {
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_b(s, 0, lb->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
- return false;
32
- }
33
-
34
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
35
- TCGReg arg = TCG_REG_R4;
36
-
37
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
38
- if (l->addrlo_reg != arg) {
39
- tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
40
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
41
- } else if (l->addrhi_reg != arg + 1) {
42
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
43
- tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
44
- } else {
45
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
46
- tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
47
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
48
- }
49
- } else {
50
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
51
- }
52
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
53
-
54
- /* "Tail call" to the helper, with the return address back inline. */
55
- tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
56
- : helper_unaligned_st));
57
- return true;
58
-}
59
-
60
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
61
-{
62
- return tcg_out_fail_alignment(s, l);
63
-}
64
-
65
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
66
-{
67
- return tcg_out_fail_alignment(s, l);
68
-}
69
-#endif /* SOFTMMU */
70
71
typedef struct {
72
TCGReg base;
73
--
74
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/loongarch64/tcg-target.c.inc | 30 ------------------------------
9
1 file changed, 30 deletions(-)
10
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/loongarch64/tcg-target.c.inc
14
+++ b/tcg/loongarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
16
* Load/store helpers for SoftMMU, and qemu_ld/st implementations
17
*/
18
19
-#if defined(CONFIG_SOFTMMU)
20
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
21
{
22
tcg_out_opc_b(s, 0);
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
25
return tcg_out_goto(s, l->raddr);
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- /* resolve label address */
31
- if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
37
-
38
- /* tail call, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
40
- tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st), true);
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-
55
-#endif /* CONFIG_SOFTMMU */
56
57
typedef struct {
58
TCGReg base;
59
--
60
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/riscv/tcg-target.c.inc | 29 -----------------------------
9
1 file changed, 29 deletions(-)
10
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/riscv/tcg-target.c.inc
14
+++ b/tcg/riscv/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
16
* Load/store and TLB
17
*/
18
19
-#if defined(CONFIG_SOFTMMU)
20
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
21
{
22
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
tcg_out_goto(s, l->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- /* resolve label address */
31
- if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
37
-
38
- /* tail call, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
40
- tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st), true);
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-#endif /* CONFIG_SOFTMMU */
55
56
/*
57
* For softmmu, perform the TLB load and compare.
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Always reserve r3 for tlb softmmu lookup. Fix a bug in user-only
2
ALL_QLDST_REGS, in that r14 is clobbered by the BLNE that leads
3
to the misaligned trap. Remove r0+r1 from user-only ALL_QLDST_REGS;
4
I believe these had been reserved for bswap, which we no longer
5
perform during qemu_st.
1
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/arm/tcg-target-con-set.h | 16 ++++++++--------
11
tcg/arm/tcg-target-con-str.h | 5 ++---
12
tcg/arm/tcg-target.c.inc | 23 ++++++++---------------
13
3 files changed, 18 insertions(+), 26 deletions(-)
14
15
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/arm/tcg-target-con-set.h
18
+++ b/tcg/arm/tcg-target-con-set.h
19
@@ -XXX,XX +XXX,XX @@
20
C_O0_I1(r)
21
C_O0_I2(r, r)
22
C_O0_I2(r, rIN)
23
-C_O0_I2(s, s)
24
+C_O0_I2(q, q)
25
C_O0_I2(w, r)
26
-C_O0_I3(s, s, s)
27
-C_O0_I3(S, p, s)
28
+C_O0_I3(q, q, q)
29
+C_O0_I3(Q, p, q)
30
C_O0_I4(r, r, rI, rI)
31
-C_O0_I4(S, p, s, s)
32
-C_O1_I1(r, l)
33
+C_O0_I4(Q, p, q, q)
34
+C_O1_I1(r, q)
35
C_O1_I1(r, r)
36
C_O1_I1(w, r)
37
C_O1_I1(w, w)
38
C_O1_I1(w, wr)
39
C_O1_I2(r, 0, rZ)
40
-C_O1_I2(r, l, l)
41
+C_O1_I2(r, q, q)
42
C_O1_I2(r, r, r)
43
C_O1_I2(r, r, rI)
44
C_O1_I2(r, r, rIK)
45
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wZ)
46
C_O1_I3(w, w, w, w)
47
C_O1_I4(r, r, r, rI, rI)
48
C_O1_I4(r, r, rIN, rIK, 0)
49
-C_O2_I1(e, p, l)
50
-C_O2_I2(e, p, l, l)
51
+C_O2_I1(e, p, q)
52
+C_O2_I2(e, p, q, q)
53
C_O2_I2(r, r, r, r)
54
C_O2_I4(r, r, r, r, rIN, rIK)
55
C_O2_I4(r, r, rI, rI, rIN, rIK)
56
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
57
index XXXXXXX..XXXXXXX 100644
58
--- a/tcg/arm/tcg-target-con-str.h
59
+++ b/tcg/arm/tcg-target-con-str.h
60
@@ -XXX,XX +XXX,XX @@
61
*/
62
REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
63
REGS('r', ALL_GENERAL_REGS)
64
-REGS('l', ALL_QLOAD_REGS)
65
-REGS('s', ALL_QSTORE_REGS)
66
-REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */
67
+REGS('q', ALL_QLDST_REGS)
68
+REGS('Q', ALL_QLDST_REGS & 0x5555) /* even qldst */
69
REGS('w', ALL_VECTOR_REGS)
70
71
/*
72
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
73
index XXXXXXX..XXXXXXX 100644
74
--- a/tcg/arm/tcg-target.c.inc
75
+++ b/tcg/arm/tcg-target.c.inc
76
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
77
#define ALL_VECTOR_REGS 0xffff0000u
78
79
/*
80
- * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
81
- * and r0-r1 doing the byte swapping, so don't use these.
82
- * r3 is removed for softmmu to avoid clashes with helper arguments.
83
+ * r0-r3 will be overwritten when reading the tlb entry (softmmu only);
84
+ * r14 will be overwritten by the BLNE branching to the slow path.
85
*/
86
#ifdef CONFIG_SOFTMMU
87
-#define ALL_QLOAD_REGS \
88
+#define ALL_QLDST_REGS \
89
(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
90
(1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
91
(1 << TCG_REG_R14)))
92
-#define ALL_QSTORE_REGS \
93
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
94
- (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
95
- ((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
96
#else
97
-#define ALL_QLOAD_REGS ALL_GENERAL_REGS
98
-#define ALL_QSTORE_REGS \
99
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
100
+#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R14))
101
#endif
102
103
/*
104
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
105
return C_O1_I4(r, r, r, rI, rI);
106
107
case INDEX_op_qemu_ld_i32:
108
- return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
109
+ return TARGET_LONG_BITS == 32 ? C_O1_I1(r, q) : C_O1_I2(r, q, q);
110
case INDEX_op_qemu_ld_i64:
111
- return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
112
+ return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, q) : C_O2_I2(e, p, q, q);
113
case INDEX_op_qemu_st_i32:
114
- return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
115
+ return TARGET_LONG_BITS == 32 ? C_O0_I2(q, q) : C_O0_I3(q, q, q);
116
case INDEX_op_qemu_st_i64:
117
- return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
118
+ return TARGET_LONG_BITS == 32 ? C_O0_I3(Q, p, q) : C_O0_I4(Q, p, q, q);
119
120
case INDEX_op_st_vec:
121
return C_O0_I2(w, r);
122
--
123
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/arm/tcg-target.c.inc | 45 ----------------------------------------
9
1 file changed, 45 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
bool index_scratch;
17
} HostAddress;
18
19
-#ifdef CONFIG_SOFTMMU
20
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
{
22
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
- return false;
32
- }
33
-
34
- if (TARGET_LONG_BITS == 64) {
35
- /* 64-bit target address is aligned into R2:R3. */
36
- TCGMovExtend ext[2] = {
37
- { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
38
- .src = l->addrlo_reg,
39
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
40
- { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
41
- .src = l->addrhi_reg,
42
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
43
- };
44
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
45
- } else {
46
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
47
- }
48
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
49
-
50
- /*
51
- * Tail call to the helper, with the return address back inline,
52
- * just for the clarity of the debugging traceback -- the helper
53
- * cannot return. We have used BLNE to arrive here, so LR is
54
- * already set.
55
- */
56
- tcg_out_goto(s, COND_AL, (const void *)
57
- (l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
58
- return true;
59
-}
60
-
61
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
62
-{
63
- return tcg_out_fail_alignment(s, l);
64
-}
65
-
66
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
67
-{
68
- return tcg_out_fail_alignment(s, l);
69
-}
70
-#endif /* SOFTMMU */
71
72
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
73
TCGReg addrlo, TCGReg addrhi,
74
--
75
2.34.1
diff view generated by jsdifflib
1
Map the stack executable if required by default or on demand.
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
2
4
3
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
include/elf.h | 1 +
8
tcg/mips/tcg-target.c.inc | 57 ++-------------------------------------
8
linux-user/qemu.h | 1 +
9
1 file changed, 2 insertions(+), 55 deletions(-)
9
linux-user/elfload.c | 19 ++++++++++++++++++-
10
3 files changed, 20 insertions(+), 1 deletion(-)
11
10
12
diff --git a/include/elf.h b/include/elf.h
11
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/include/elf.h
13
--- a/tcg/mips/tcg-target.c.inc
15
+++ b/include/elf.h
14
+++ b/tcg/mips/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ typedef int64_t Elf64_Sxword;
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
17
#define PT_LOPROC 0x70000000
16
tcg_out_nop(s);
18
#define PT_HIPROC 0x7fffffff
17
}
19
18
20
+#define PT_GNU_STACK (PT_LOOS + 0x474e551)
19
-#if defined(CONFIG_SOFTMMU)
21
#define PT_GNU_PROPERTY (PT_LOOS + 0x474e553)
20
/* We have four temps, we might as well expose three of them. */
22
21
static const TCGLdstHelperParam ldst_helper_param = {
23
#define PT_MIPS_REGINFO 0x70000000
22
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
24
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
25
index XXXXXXX..XXXXXXX 100644
24
26
--- a/linux-user/qemu.h
25
/* resolve label address */
27
+++ b/linux-user/qemu.h
26
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
28
@@ -XXX,XX +XXX,XX @@ struct image_info {
27
- || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
29
uint32_t elf_flags;
28
- && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
30
int personality;
29
+ || (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
31
abi_ulong alignment;
30
return false;
32
+ bool exec_stack;
33
34
/* Generic semihosting knows about these pointers. */
35
abi_ulong arg_strings; /* strings for argv */
36
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/linux-user/elfload.c
39
+++ b/linux-user/elfload.c
40
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
41
#define ELF_ARCH EM_386
42
43
#define ELF_PLATFORM get_elf_platform()
44
+#define EXSTACK_DEFAULT true
45
46
static const char *get_elf_platform(void)
47
{
48
@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en
49
50
#define ELF_ARCH EM_ARM
51
#define ELF_CLASS ELFCLASS32
52
+#define EXSTACK_DEFAULT true
53
54
static inline void init_thread(struct target_pt_regs *regs,
55
struct image_info *infop)
56
@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
57
#else
58
59
#define ELF_CLASS ELFCLASS32
60
+#define EXSTACK_DEFAULT true
61
62
#endif
63
64
@@ -XXX,XX +XXX,XX @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en
65
66
#define ELF_CLASS ELFCLASS64
67
#define ELF_ARCH EM_LOONGARCH
68
+#define EXSTACK_DEFAULT true
69
70
#define elf_check_arch(x) ((x) == EM_LOONGARCH)
71
72
@@ -XXX,XX +XXX,XX @@ static uint32_t get_elf_hwcap(void)
73
#define ELF_CLASS ELFCLASS32
74
#endif
75
#define ELF_ARCH EM_MIPS
76
+#define EXSTACK_DEFAULT true
77
78
#ifdef TARGET_ABI_MIPSN32
79
#define elf_check_abi(x) ((x) & EF_MIPS_ABI2)
80
@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
81
#define bswaptls(ptr) bswap32s(ptr)
82
#endif
83
84
+#ifndef EXSTACK_DEFAULT
85
+#define EXSTACK_DEFAULT false
86
+#endif
87
+
88
#include "elf.h"
89
90
/* We must delay the following stanzas until after "elf.h". */
91
@@ -XXX,XX +XXX,XX @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
92
struct image_info *info)
93
{
94
abi_ulong size, error, guard;
95
+ int prot;
96
97
size = guest_stack_size;
98
if (size < STACK_LOWER_LIMIT) {
99
@@ -XXX,XX +XXX,XX @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm,
100
guard = qemu_real_host_page_size();
101
}
31
}
102
32
103
- error = target_mmap(0, size + guard, PROT_READ | PROT_WRITE,
33
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
104
+ prot = PROT_READ | PROT_WRITE;
34
105
+ if (info->exec_stack) {
35
/* resolve label address */
106
+ prot |= PROT_EXEC;
36
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
107
+ }
37
- || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
108
+ error = target_mmap(0, size + guard, prot,
38
- && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
109
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
39
+ || (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
110
if (error == -1) {
40
return false;
111
perror("mmap stack");
112
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
113
*/
114
loaddr = -1, hiaddr = 0;
115
info->alignment = 0;
116
+ info->exec_stack = EXSTACK_DEFAULT;
117
for (i = 0; i < ehdr->e_phnum; ++i) {
118
struct elf_phdr *eppnt = phdr + i;
119
if (eppnt->p_type == PT_LOAD) {
120
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
121
if (!parse_elf_properties(image_fd, info, eppnt, bprm_buf, &err)) {
122
goto exit_errmsg;
123
}
124
+ } else if (eppnt->p_type == PT_GNU_STACK) {
125
+ info->exec_stack = eppnt->p_flags & PF_X;
126
}
127
}
41
}
128
42
43
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
44
return true;
45
}
46
47
-#else
48
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
49
-{
50
- void *target;
51
-
52
- if (!reloc_pc16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
53
- return false;
54
- }
55
-
56
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
57
- /* A0 is env, A1 is skipped, A2:A3 is the uint64_t address. */
58
- TCGReg a2 = MIPS_BE ? l->addrhi_reg : l->addrlo_reg;
59
- TCGReg a3 = MIPS_BE ? l->addrlo_reg : l->addrhi_reg;
60
-
61
- if (a3 != TCG_REG_A2) {
62
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
63
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
64
- } else if (a2 != TCG_REG_A3) {
65
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
66
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
67
- } else {
68
- tcg_out_mov(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A2);
69
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_A3);
70
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, TCG_TMP0);
71
- }
72
- } else {
73
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
74
- }
75
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
76
-
77
- /*
78
- * Tail call to the helper, with the return address back inline.
79
- * We have arrived here via BNEL, so $31 is already set.
80
- */
81
- target = (l->is_ld ? helper_unaligned_ld : helper_unaligned_st);
82
- tcg_out_call_int(s, target, true);
83
- return true;
84
-}
85
-
86
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
87
-{
88
- return tcg_out_fail_alignment(s, l);
89
-}
90
-
91
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
92
-{
93
- return tcg_out_fail_alignment(s, l);
94
-}
95
-#endif /* SOFTMMU */
96
-
97
typedef struct {
98
TCGReg base;
99
MemOp align;
129
--
100
--
130
2.34.1
101
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/s390x/tcg-target.c.inc | 29 -----------------------------
9
1 file changed, 29 deletions(-)
10
11
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/s390x/tcg-target.c.inc
14
+++ b/tcg/s390x/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
16
}
17
}
18
19
-#if defined(CONFIG_SOFTMMU)
20
static const TCGLdstHelperParam ldst_helper_param = {
21
.ntmp = 1, .tmp = { TCG_TMP0 }
22
};
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
31
- (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
37
-
38
- /* "Tail call" to the helper, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
40
- tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st));
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-#endif /* CONFIG_SOFTMMU */
55
56
/*
57
* For softmmu, perform the TLB load and compare.
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 15 +++++++--------
5
1 file changed, 7 insertions(+), 8 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
12
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
13
#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
14
15
-/* Define some temporary registers. T2 is used for constant generation. */
16
+/* Define some temporary registers. T3 is used for constant generation. */
17
#define TCG_REG_T1 TCG_REG_G1
18
-#define TCG_REG_T2 TCG_REG_O7
19
+#define TCG_REG_T2 TCG_REG_G2
20
+#define TCG_REG_T3 TCG_REG_O7
21
22
#ifndef CONFIG_SOFTMMU
23
# define TCG_GUEST_BASE_REG TCG_REG_I5
24
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
25
TCG_REG_I4,
26
TCG_REG_I5,
27
28
- TCG_REG_G2,
29
TCG_REG_G3,
30
TCG_REG_G4,
31
TCG_REG_G5,
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
static void tcg_out_movi(TCGContext *s, TCGType type,
34
TCGReg ret, tcg_target_long arg)
35
{
36
- tcg_debug_assert(ret != TCG_REG_T2);
37
- tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
38
+ tcg_debug_assert(ret != TCG_REG_T3);
39
+ tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T3);
40
}
41
42
static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest,
44
{
45
uintptr_t desti = (uintptr_t)dest;
46
47
- /* Be careful not to clobber %o7 for a tail call. */
48
tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
49
- desti & ~0xfff, in_prologue,
50
- tail_call ? TCG_REG_G2 : TCG_REG_O7);
51
+ desti & ~0xfff, in_prologue, TCG_REG_T2);
52
tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7,
53
TCG_REG_T1, desti & 0xfff, JMPL);
54
}
55
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
56
tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
57
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
58
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
59
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T3); /* for internal use */
60
}
61
62
#define ELF_HOST_MACHINE EM_SPARCV9
63
--
64
2.34.1
diff view generated by jsdifflib
New patch
1
Emphasize that the constant is signed.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/sparc64/tcg-target.c.inc | 21 +++++++++++----------
7
1 file changed, 11 insertions(+), 10 deletions(-)
8
9
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/sparc64/tcg-target.c.inc
12
+++ b/tcg/sparc64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
14
tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
15
}
16
17
-static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
18
+/* A 13-bit constant sign-extended to 64 bits. */
19
+static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
20
{
21
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
22
}
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
24
{
25
if (check_fit_i32(arg, 13)) {
26
/* A 13-bit constant sign-extended to 64-bits. */
27
- tcg_out_movi_imm13(s, ret, arg);
28
+ tcg_out_movi_s13(s, ret, arg);
29
} else {
30
/* A 32-bit constant zero-extended to 64 bits. */
31
tcg_out_sethi(s, ret, arg);
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
34
/* A 13-bit constant sign-extended to 64-bits. */
35
if (check_fit_tl(arg, 13)) {
36
- tcg_out_movi_imm13(s, ret, arg);
37
+ tcg_out_movi_s13(s, ret, arg);
38
return;
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
42
43
default:
44
tcg_out_cmp(s, c1, c2, c2const);
45
- tcg_out_movi_imm13(s, ret, 0);
46
+ tcg_out_movi_s13(s, ret, 0);
47
tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
48
return;
49
}
50
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
51
/* For 64-bit signed comparisons vs zero, we can avoid the compare
52
if the input does not overlap the output. */
53
if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
54
- tcg_out_movi_imm13(s, ret, 0);
55
+ tcg_out_movi_s13(s, ret, 0);
56
tcg_out_movr(s, cond, ret, c1, 1, 1);
57
} else {
58
tcg_out_cmp(s, c1, c2, c2const);
59
- tcg_out_movi_imm13(s, ret, 0);
60
+ tcg_out_movi_s13(s, ret, 0);
61
tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
62
}
63
}
64
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
65
if (use_vis3_instructions && !is_sub) {
66
/* Note that ADDXC doesn't accept immediates. */
67
if (bhconst && bh != 0) {
68
- tcg_out_movi_imm13(s, TCG_REG_T2, bh);
69
+ tcg_out_movi_s13(s, TCG_REG_T2, bh);
70
bh = TCG_REG_T2;
71
}
72
tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
74
* so the adjustment fits 12 bits.
75
*/
76
if (bhconst) {
77
- tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
78
+ tcg_out_movi_s13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
79
} else {
80
tcg_out_arithi(s, TCG_REG_T2, bh, 1,
81
is_sub ? ARITH_SUB : ARITH_ADD);
82
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
83
tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
84
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
85
/* delay slot */
86
- tcg_out_movi_imm13(s, TCG_REG_O0, 0);
87
+ tcg_out_movi_s13(s, TCG_REG_O0, 0);
88
89
build_trampolines(s);
90
}
91
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
92
{
93
if (check_fit_ptr(a0, 13)) {
94
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
95
- tcg_out_movi_imm13(s, TCG_REG_O0, a0);
96
+ tcg_out_movi_s13(s, TCG_REG_O0, a0);
97
return;
98
} else {
99
intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
100
--
101
2.34.1
diff view generated by jsdifflib
New patch
1
Shuffle the order in tcg_out_movi_int to check s13 first, and
2
drop this check from tcg_out_movi_imm32. This might make the
3
sequence for in_prologue larger, but not worth worrying about.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/sparc64/tcg-target.c.inc | 25 ++++++++++---------------
9
1 file changed, 10 insertions(+), 15 deletions(-)
10
11
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/sparc64/tcg-target.c.inc
14
+++ b/tcg/sparc64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
16
17
static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
18
{
19
- if (check_fit_i32(arg, 13)) {
20
- /* A 13-bit constant sign-extended to 64-bits. */
21
- tcg_out_movi_s13(s, ret, arg);
22
- } else {
23
- /* A 32-bit constant zero-extended to 64 bits. */
24
- tcg_out_sethi(s, ret, arg);
25
- if (arg & 0x3ff) {
26
- tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
27
- }
28
+ /* A 32-bit constant zero-extended to 64 bits. */
29
+ tcg_out_sethi(s, ret, arg);
30
+ if (arg & 0x3ff) {
31
+ tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
32
}
33
}
34
35
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
36
tcg_target_long hi, lo = (int32_t)arg;
37
tcg_target_long test, lsb;
38
39
- /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
40
- if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
41
- tcg_out_movi_imm32(s, ret, arg);
42
- return;
43
- }
44
-
45
/* A 13-bit constant sign-extended to 64-bits. */
46
if (check_fit_tl(arg, 13)) {
47
tcg_out_movi_s13(s, ret, arg);
48
return;
49
}
50
51
+ /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
52
+ if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
53
+ tcg_out_movi_imm32(s, ret, arg);
54
+ return;
55
+ }
56
+
57
/* A 13-bit constant relative to the TB. */
58
if (!in_prologue) {
59
test = tcg_tbrel_diff(s, (void *)arg);
60
--
61
2.34.1
diff view generated by jsdifflib
New patch
1
Emphasize that the constant is unsigned.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/sparc64/tcg-target.c.inc | 12 ++++++------
7
1 file changed, 6 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/sparc64/tcg-target.c.inc
12
+++ b/tcg/sparc64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
14
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
15
}
16
17
-static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
18
+/* A 32-bit constant zero-extended to 64 bits. */
19
+static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
20
{
21
- /* A 32-bit constant zero-extended to 64 bits. */
22
tcg_out_sethi(s, ret, arg);
23
if (arg & 0x3ff) {
24
tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
26
27
/* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
28
if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
29
- tcg_out_movi_imm32(s, ret, arg);
30
+ tcg_out_movi_u32(s, ret, arg);
31
return;
32
}
33
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
35
/* A 64-bit constant decomposed into 2 32-bit pieces. */
36
if (check_fit_i32(lo, 13)) {
37
hi = (arg - lo) >> 32;
38
- tcg_out_movi_imm32(s, ret, hi);
39
+ tcg_out_movi_u32(s, ret, hi);
40
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
41
tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
42
} else {
43
hi = arg >> 32;
44
- tcg_out_movi_imm32(s, ret, hi);
45
- tcg_out_movi_imm32(s, scratch, lo);
46
+ tcg_out_movi_u32(s, ret, hi);
47
+ tcg_out_movi_u32(s, scratch, lo);
48
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
49
tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
50
}
51
--
52
2.34.1
diff view generated by jsdifflib
1
We're about to start validating PAGE_EXEC, which means that we've
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
got to mark page zero executable. We had been special casing this
3
entirely within translate.
4
5
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
3
---
9
linux-user/elfload.c | 34 +++++++++++++++++++++++++++++++---
4
tcg/sparc64/tcg-target.c.inc | 10 ++++++++--
10
1 file changed, 31 insertions(+), 3 deletions(-)
5
1 file changed, 8 insertions(+), 2 deletions(-)
11
6
12
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
14
--- a/linux-user/elfload.c
9
--- a/tcg/sparc64/tcg-target.c.inc
15
+++ b/linux-user/elfload.c
10
+++ b/tcg/sparc64/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static inline void init_thread(struct target_pt_regs *regs,
11
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
17
regs->gr[31] = infop->entry;
12
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
18
}
13
}
19
14
20
+#define LO_COMMPAGE 0
15
+/* A 32-bit constant sign-extended to 64 bits. */
21
+
16
+static void tcg_out_movi_s32(TCGContext *s, TCGReg ret, int32_t arg)
22
+static bool init_guest_commpage(void)
23
+{
17
+{
24
+ void *want = g2h_untagged(LO_COMMPAGE);
18
+ tcg_out_sethi(s, ret, ~arg);
25
+ void *addr = mmap(want, qemu_host_page_size, PROT_NONE,
19
+ tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
26
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
27
+
28
+ if (addr == MAP_FAILED) {
29
+ perror("Allocating guest commpage");
30
+ exit(EXIT_FAILURE);
31
+ }
32
+ if (addr != want) {
33
+ return false;
34
+ }
35
+
36
+ /*
37
+ * On Linux, page zero is normally marked execute only + gateway.
38
+ * Normal read or write is supposed to fail (thus PROT_NONE above),
39
+ * but specific offsets have kernel code mapped to raise permissions
40
+ * and implement syscalls. Here, simply mark the page executable.
41
+ * Special case the entry points during translation (see do_page_zero).
42
+ */
43
+ page_set_flags(LO_COMMPAGE, LO_COMMPAGE + TARGET_PAGE_SIZE,
44
+ PAGE_EXEC | PAGE_VALID);
45
+ return true;
46
+}
20
+}
47
+
21
+
48
#endif /* TARGET_HPPA */
22
/* A 32-bit constant zero-extended to 64 bits. */
49
23
static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
50
#ifdef TARGET_XTENSA
24
{
51
@@ -XXX,XX +XXX,XX @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
52
}
26
53
27
/* A 32-bit constant sign-extended to 64-bits. */
54
#if defined(HI_COMMPAGE)
28
if (arg == lo) {
55
-#define LO_COMMPAGE 0
29
- tcg_out_sethi(s, ret, ~arg);
56
+#define LO_COMMPAGE -1
30
- tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
57
#elif defined(LO_COMMPAGE)
31
+ tcg_out_movi_s32(s, ret, arg);
58
#define HI_COMMPAGE 0
32
return;
59
#else
60
#define HI_COMMPAGE 0
61
-#define LO_COMMPAGE 0
62
+#define LO_COMMPAGE -1
63
#define init_guest_commpage() true
64
#endif
65
66
@@ -XXX,XX +XXX,XX @@ static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
67
} else {
68
offset = -(HI_COMMPAGE & -align);
69
}
70
- } else if (LO_COMMPAGE != 0) {
71
+ } else if (LO_COMMPAGE != -1) {
72
loaddr = MIN(loaddr, LO_COMMPAGE & -align);
73
}
33
}
74
34
75
--
35
--
76
2.34.1
36
2.34.1
diff view generated by jsdifflib
1
The function is not used outside of cpu-exec.c. Move it and
1
Drop the target-specific trampolines for the standard slow path.
2
its subroutines up in the file, before the first use.
2
This lets us use tcg_out_helper_{ld,st}_args, and handles the new
3
atomicity bits within MemOp.
3
4
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
At the same time, use the full load/store helpers for user-only mode.
5
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Drop inline unaligned access support for user-only mode, as it does
6
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
not handle atomicity.
8
9
Use TCG_REG_T[1-3] in the tlb lookup, instead of TCG_REG_O[0-2].
10
This allows the constraints to be simplified.
11
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
14
---
9
include/exec/exec-all.h | 3 -
15
tcg/sparc64/tcg-target-con-set.h | 2 -
10
accel/tcg/cpu-exec.c | 122 ++++++++++++++++++++--------------------
16
tcg/sparc64/tcg-target-con-str.h | 1 -
11
2 files changed, 61 insertions(+), 64 deletions(-)
17
tcg/sparc64/tcg-target.h | 1 +
18
tcg/sparc64/tcg-target.c.inc | 610 +++++++++----------------------
19
4 files changed, 182 insertions(+), 432 deletions(-)
12
20
13
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
21
diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/exec-all.h
23
--- a/tcg/sparc64/tcg-target-con-set.h
16
+++ b/include/exec/exec-all.h
24
+++ b/tcg/sparc64/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
25
@@ -XXX,XX +XXX,XX @@
26
C_O0_I1(r)
27
C_O0_I2(rZ, r)
28
C_O0_I2(rZ, rJ)
29
-C_O0_I2(sZ, s)
30
-C_O1_I1(r, s)
31
C_O1_I1(r, r)
32
C_O1_I2(r, r, r)
33
C_O1_I2(r, rZ, rJ)
34
diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/sparc64/tcg-target-con-str.h
37
+++ b/tcg/sparc64/tcg-target-con-str.h
38
@@ -XXX,XX +XXX,XX @@
39
* REGS(letter, register_mask)
40
*/
41
REGS('r', ALL_GENERAL_REGS)
42
-REGS('s', ALL_QLDST_REGS)
43
44
/*
45
* Define constraint letters for constants:
46
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/sparc64/tcg-target.h
49
+++ b/tcg/sparc64/tcg-target.h
50
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
51
52
#define TCG_TARGET_DEFAULT_MO (0)
53
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
54
+#define TCG_TARGET_NEED_LDST_LABELS
55
#define TCG_TARGET_NEED_POOL_LABELS
56
18
#endif
57
#endif
19
void tb_flush(CPUState *cpu);
58
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
20
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
21
-TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
22
- target_ulong cs_base, uint32_t flags,
23
- uint32_t cflags);
24
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
25
26
/* GETPC is the true target of the return instruction that we'll execute. */
27
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
28
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
29
--- a/accel/tcg/cpu-exec.c
60
--- a/tcg/sparc64/tcg-target.c.inc
30
+++ b/accel/tcg/cpu-exec.c
61
+++ b/tcg/sparc64/tcg-target.c.inc
31
@@ -XXX,XX +XXX,XX @@ uint32_t curr_cflags(CPUState *cpu)
62
@@ -XXX,XX +XXX,XX @@
32
return cflags;
63
#error "unsupported code generation mode"
64
#endif
65
66
+#include "../tcg-ldst.c.inc"
67
#include "../tcg-pool.c.inc"
68
69
#ifdef CONFIG_DEBUG_TCG
70
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
71
#define TCG_CT_CONST_S13 0x200
72
#define TCG_CT_CONST_ZERO 0x400
73
74
-/*
75
- * For softmmu, we need to avoid conflicts with the first 3
76
- * argument registers to perform the tlb lookup, and to call
77
- * the helper function.
78
- */
79
-#ifdef CONFIG_SOFTMMU
80
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
81
-#else
82
-#define SOFTMMU_RESERVE_REGS 0
83
-#endif
84
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
85
-#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
86
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
87
88
/* Define some temporary registers. T3 is used for constant generation. */
89
#define TCG_REG_T1 TCG_REG_G1
90
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
91
tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
33
}
92
}
34
93
35
+struct tb_desc {
94
-#ifdef CONFIG_SOFTMMU
36
+ target_ulong pc;
95
-static const tcg_insn_unit *qemu_ld_trampoline[MO_SSIZE + 1];
37
+ target_ulong cs_base;
96
-static const tcg_insn_unit *qemu_st_trampoline[MO_SIZE + 1];
38
+ CPUArchState *env;
97
-
39
+ tb_page_addr_t phys_page1;
98
-static void build_trampolines(TCGContext *s)
40
+ uint32_t flags;
99
-{
41
+ uint32_t cflags;
100
- int i;
42
+ uint32_t trace_vcpu_dstate;
101
-
102
- for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
103
- if (qemu_ld_helpers[i] == NULL) {
104
- continue;
105
- }
106
-
107
- /* May as well align the trampoline. */
108
- while ((uintptr_t)s->code_ptr & 15) {
109
- tcg_out_nop(s);
110
- }
111
- qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
112
-
113
- /* Set the retaddr operand. */
114
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
115
- /* Tail call. */
116
- tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
117
- /* delay slot -- set the env argument */
118
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
119
- }
120
-
121
- for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
122
- if (qemu_st_helpers[i] == NULL) {
123
- continue;
124
- }
125
-
126
- /* May as well align the trampoline. */
127
- while ((uintptr_t)s->code_ptr & 15) {
128
- tcg_out_nop(s);
129
- }
130
- qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
131
-
132
- /* Set the retaddr operand. */
133
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
134
-
135
- /* Tail call. */
136
- tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
137
- /* delay slot -- set the env argument */
138
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
139
- }
140
-}
141
-#else
142
-static const tcg_insn_unit *qemu_unalign_ld_trampoline;
143
-static const tcg_insn_unit *qemu_unalign_st_trampoline;
144
-
145
-static void build_trampolines(TCGContext *s)
146
-{
147
- for (int ld = 0; ld < 2; ++ld) {
148
- void *helper;
149
-
150
- while ((uintptr_t)s->code_ptr & 15) {
151
- tcg_out_nop(s);
152
- }
153
-
154
- if (ld) {
155
- helper = helper_unaligned_ld;
156
- qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr);
157
- } else {
158
- helper = helper_unaligned_st;
159
- qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
160
- }
161
-
162
- /* Tail call. */
163
- tcg_out_jmpl_const(s, helper, true, true);
164
- /* delay slot -- set the env argument */
165
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
166
- }
167
-}
168
-#endif
169
-
170
/* Generate global QEMU prologue and epilogue code */
171
static void tcg_target_qemu_prologue(TCGContext *s)
172
{
173
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
174
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
175
/* delay slot */
176
tcg_out_movi_s13(s, TCG_REG_O0, 0);
177
-
178
- build_trampolines(s);
179
}
180
181
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
183
}
184
}
185
186
-#if defined(CONFIG_SOFTMMU)
187
+static const TCGLdstHelperParam ldst_helper_param = {
188
+ .ntmp = 1, .tmp = { TCG_REG_T1 }
43
+};
189
+};
44
+
190
45
+static bool tb_lookup_cmp(const void *p, const void *d)
191
-/* We expect to use a 13-bit negative offset from ENV. */
192
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
193
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
194
-
195
-/* Perform the TLB load and compare.
196
-
197
- Inputs:
198
- ADDRLO and ADDRHI contain the possible two parts of the address.
199
-
200
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
201
-
202
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
203
- This should be offsetof addr_read or addr_write.
204
-
205
- The result of the TLB comparison is in %[ix]cc. The sanitized address
206
- is in the returned register, maybe %o0. The TLB addend is in %o1. */
207
-
208
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
209
- MemOp opc, int which)
210
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
211
{
212
+ MemOp opc = get_memop(lb->oi);
213
+ MemOp sgn;
214
+
215
+ if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
216
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
217
+ return false;
218
+ }
219
+
220
+ /* Use inline tcg_out_ext32s; otherwise let the helper sign-extend. */
221
+ sgn = (opc & MO_SIZE) < MO_32 ? MO_SIGN : 0;
222
+
223
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
224
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_SIZE | sgn)], NULL);
225
+ tcg_out_ld_helper_ret(s, lb, sgn, &ldst_helper_param);
226
+
227
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
228
+ return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
229
+ (intptr_t)lb->raddr, 0);
230
+}
231
+
232
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
46
+{
233
+{
47
+ const TranslationBlock *tb = p;
234
+ MemOp opc = get_memop(lb->oi);
48
+ const struct tb_desc *desc = d;
235
+
49
+
236
+ if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
50
+ if (tb->pc == desc->pc &&
237
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
51
+ tb->page_addr[0] == desc->phys_page1 &&
238
+ return false;
52
+ tb->cs_base == desc->cs_base &&
239
+ }
53
+ tb->flags == desc->flags &&
240
+
54
+ tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
241
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
55
+ tb_cflags(tb) == desc->cflags) {
242
+ tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE], NULL);
56
+ /* check next page if needed */
243
+
57
+ if (tb->page_addr[1] == -1) {
244
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
58
+ return true;
245
+ return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
59
+ } else {
246
+ (intptr_t)lb->raddr, 0);
60
+ tb_page_addr_t phys_page2;
247
+}
61
+ target_ulong virt_page2;
248
+
62
+
249
+typedef struct {
63
+ virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
250
+ TCGReg base;
64
+ phys_page2 = get_page_addr_code(desc->env, virt_page2);
251
+ TCGReg index;
65
+ if (tb->page_addr[1] == phys_page2) {
252
+} HostAddress;
66
+ return true;
253
+
67
+ }
254
+/*
255
+ * For softmmu, perform the TLB load and compare.
256
+ * For useronly, perform any required alignment tests.
257
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
258
+ * is required and fill in @h with the host address for the fast path.
259
+ */
260
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
261
+ TCGReg addr_reg, MemOpIdx oi,
262
+ bool is_ld)
263
+{
264
+ TCGLabelQemuLdst *ldst = NULL;
265
+ MemOp opc = get_memop(oi);
266
+ unsigned a_bits = get_alignment_bits(opc);
267
+ unsigned s_bits = opc & MO_SIZE;
268
+ unsigned a_mask;
269
+
270
+ /* We don't support unaligned accesses. */
271
+ a_bits = MAX(a_bits, s_bits);
272
+ a_mask = (1u << a_bits) - 1;
273
+
274
+#ifdef CONFIG_SOFTMMU
275
+ int mem_index = get_mmuidx(oi);
276
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
277
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
278
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
279
- const TCGReg r0 = TCG_REG_O0;
280
- const TCGReg r1 = TCG_REG_O1;
281
- const TCGReg r2 = TCG_REG_O2;
282
- unsigned s_bits = opc & MO_SIZE;
283
- unsigned a_bits = get_alignment_bits(opc);
284
- tcg_target_long compare_mask;
285
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
286
+ : offsetof(CPUTLBEntry, addr_write);
287
+ int add_off = offsetof(CPUTLBEntry, addend);
288
+ int compare_mask;
289
+ int cc;
290
291
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
292
- tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
293
- tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
294
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
295
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
296
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T2, TCG_AREG0, mask_off);
297
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T3, TCG_AREG0, table_off);
298
299
/* Extract the page index, shifted into place for tlb index. */
300
- tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
301
- SHIFT_SRL);
302
- tcg_out_arith(s, r2, r2, r0, ARITH_AND);
303
+ tcg_out_arithi(s, TCG_REG_T1, addr_reg,
304
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
305
+ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
306
307
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
308
- tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
309
+ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T3, ARITH_ADD);
310
311
- /* Load the tlb comparator and the addend. */
312
- tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
313
- tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
314
+ /* Load the tlb comparator and the addend. */
315
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_T2, TCG_REG_T1, cmp_off);
316
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T1, TCG_REG_T1, add_off);
317
+ h->base = TCG_REG_T1;
318
319
- /* Mask out the page offset, except for the required alignment.
320
- We don't support unaligned accesses. */
321
- if (a_bits < s_bits) {
322
- a_bits = s_bits;
323
- }
324
- compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
325
+ /* Mask out the page offset, except for the required alignment. */
326
+ compare_mask = TARGET_PAGE_MASK | a_mask;
327
if (check_fit_tl(compare_mask, 13)) {
328
- tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
329
+ tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
330
} else {
331
- tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
332
- tcg_out_arith(s, r2, addr, r2, ARITH_AND);
333
+ tcg_out_movi_s32(s, TCG_REG_T3, compare_mask);
334
+ tcg_out_arith(s, TCG_REG_T3, addr_reg, TCG_REG_T3, ARITH_AND);
335
}
336
- tcg_out_cmp(s, r0, r2, 0);
337
+ tcg_out_cmp(s, TCG_REG_T2, TCG_REG_T3, 0);
338
339
- /* If the guest address must be zero-extended, do so now. */
340
+ ldst = new_ldst_label(s);
341
+ ldst->is_ld = is_ld;
342
+ ldst->oi = oi;
343
+ ldst->addrlo_reg = addr_reg;
344
+ ldst->label_ptr[0] = s->code_ptr;
345
+
346
+ /* bne,pn %[xi]cc, label0 */
347
+ cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
348
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
349
+#else
350
+ if (a_bits != s_bits) {
351
+ /*
352
+ * Test for at least natural alignment, and defer
353
+ * everything else to the helper functions.
354
+ */
355
+ tcg_debug_assert(check_fit_tl(a_mask, 13));
356
+ tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
357
+
358
+ ldst = new_ldst_label(s);
359
+ ldst->is_ld = is_ld;
360
+ ldst->oi = oi;
361
+ ldst->addrlo_reg = addr_reg;
362
+ ldst->label_ptr[0] = s->code_ptr;
363
+
364
+ /* bne,pn %icc, label0 */
365
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN | BPCC_ICC, 0);
366
+ }
367
+ h->base = guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0;
368
+#endif
369
+
370
+ /* If the guest address must be zero-extended, do in the delay slot. */
371
if (TARGET_LONG_BITS == 32) {
372
- tcg_out_ext32u(s, r0, addr);
373
- return r0;
374
+ tcg_out_ext32u(s, TCG_REG_T2, addr_reg);
375
+ h->index = TCG_REG_T2;
376
+ } else {
377
+ if (ldst) {
378
+ tcg_out_nop(s);
68
+ }
379
+ }
69
+ }
380
+ h->index = addr_reg;
70
+ return false;
381
}
71
+}
382
- return addr;
72
+
383
+ return ldst;
73
+static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
74
+ target_ulong cs_base, uint32_t flags,
75
+ uint32_t cflags)
76
+{
77
+ tb_page_addr_t phys_pc;
78
+ struct tb_desc desc;
79
+ uint32_t h;
80
+
81
+ desc.env = cpu->env_ptr;
82
+ desc.cs_base = cs_base;
83
+ desc.flags = flags;
84
+ desc.cflags = cflags;
85
+ desc.trace_vcpu_dstate = *cpu->trace_dstate;
86
+ desc.pc = pc;
87
+ phys_pc = get_page_addr_code(desc.env, pc);
88
+ if (phys_pc == -1) {
89
+ return NULL;
90
+ }
91
+ desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
92
+ h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
93
+ return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
94
+}
95
+
96
/* Might cause an exception, so have a longjmp destination ready */
97
static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
98
target_ulong cs_base,
99
@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
100
end_exclusive();
101
}
384
}
102
385
-#endif /* CONFIG_SOFTMMU */
103
-struct tb_desc {
386
-
104
- target_ulong pc;
387
-static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
105
- target_ulong cs_base;
388
- [MO_UB] = LDUB,
106
- CPUArchState *env;
389
- [MO_SB] = LDSB,
107
- tb_page_addr_t phys_page1;
390
- [MO_UB | MO_LE] = LDUB,
108
- uint32_t flags;
391
- [MO_SB | MO_LE] = LDSB,
109
- uint32_t cflags;
392
-
110
- uint32_t trace_vcpu_dstate;
393
- [MO_BEUW] = LDUH,
394
- [MO_BESW] = LDSH,
395
- [MO_BEUL] = LDUW,
396
- [MO_BESL] = LDSW,
397
- [MO_BEUQ] = LDX,
398
- [MO_BESQ] = LDX,
399
-
400
- [MO_LEUW] = LDUH_LE,
401
- [MO_LESW] = LDSH_LE,
402
- [MO_LEUL] = LDUW_LE,
403
- [MO_LESL] = LDSW_LE,
404
- [MO_LEUQ] = LDX_LE,
405
- [MO_LESQ] = LDX_LE,
111
-};
406
-};
112
-
407
-
113
-static bool tb_lookup_cmp(const void *p, const void *d)
408
-static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
114
-{
409
- [MO_UB] = STB,
115
- const TranslationBlock *tb = p;
410
-
116
- const struct tb_desc *desc = d;
411
- [MO_BEUW] = STH,
117
-
412
- [MO_BEUL] = STW,
118
- if (tb->pc == desc->pc &&
413
- [MO_BEUQ] = STX,
119
- tb->page_addr[0] == desc->phys_page1 &&
414
-
120
- tb->cs_base == desc->cs_base &&
415
- [MO_LEUW] = STH_LE,
121
- tb->flags == desc->flags &&
416
- [MO_LEUL] = STW_LE,
122
- tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
417
- [MO_LEUQ] = STX_LE,
123
- tb_cflags(tb) == desc->cflags) {
418
-};
124
- /* check next page if needed */
419
125
- if (tb->page_addr[1] == -1) {
420
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
126
- return true;
421
MemOpIdx oi, TCGType data_type)
422
{
423
- MemOp memop = get_memop(oi);
424
- tcg_insn_unit *label_ptr;
425
+ static const int ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
426
+ [MO_UB] = LDUB,
427
+ [MO_SB] = LDSB,
428
+ [MO_UB | MO_LE] = LDUB,
429
+ [MO_SB | MO_LE] = LDSB,
430
431
-#ifdef CONFIG_SOFTMMU
432
- unsigned memi = get_mmuidx(oi);
433
- TCGReg addrz;
434
- const tcg_insn_unit *func;
435
+ [MO_BEUW] = LDUH,
436
+ [MO_BESW] = LDSH,
437
+ [MO_BEUL] = LDUW,
438
+ [MO_BESL] = LDSW,
439
+ [MO_BEUQ] = LDX,
440
+ [MO_BESQ] = LDX,
441
442
- addrz = tcg_out_tlb_load(s, addr, memi, memop,
443
- offsetof(CPUTLBEntry, addr_read));
444
+ [MO_LEUW] = LDUH_LE,
445
+ [MO_LESW] = LDSH_LE,
446
+ [MO_LEUL] = LDUW_LE,
447
+ [MO_LESL] = LDSW_LE,
448
+ [MO_LEUQ] = LDX_LE,
449
+ [MO_LESQ] = LDX_LE,
450
+ };
451
452
- /* The fast path is exactly one insn. Thus we can perform the
453
- entire TLB Hit in the (annulled) delay slot of the branch
454
- over the TLB Miss case. */
455
+ TCGLabelQemuLdst *ldst;
456
+ HostAddress h;
457
458
- /* beq,a,pt %[xi]cc, label0 */
459
- label_ptr = s->code_ptr;
460
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
461
- | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
462
- /* delay slot */
463
- tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
464
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
465
+ ldst = prepare_host_addr(s, &h, addr, oi, true);
466
467
- /* TLB Miss. */
468
+ tcg_out_ldst_rr(s, data, h.base, h.index,
469
+ ld_opc[get_memop(oi) & (MO_BSWAP | MO_SSIZE)]);
470
471
- tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
472
-
473
- /* We use the helpers to extend SB and SW data, leaving the case
474
- of SL needing explicit extending below. */
475
- if ((memop & MO_SSIZE) == MO_SL) {
476
- func = qemu_ld_trampoline[MO_UL];
477
- } else {
478
- func = qemu_ld_trampoline[memop & MO_SSIZE];
479
+ if (ldst) {
480
+ ldst->type = data_type;
481
+ ldst->datalo_reg = data;
482
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
483
}
484
- tcg_debug_assert(func != NULL);
485
- tcg_out_call_nodelay(s, func, false);
486
- /* delay slot */
487
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
488
-
489
- /* We let the helper sign-extend SB and SW, but leave SL for here. */
490
- if ((memop & MO_SSIZE) == MO_SL) {
491
- tcg_out_ext32s(s, data, TCG_REG_O0);
492
- } else {
493
- tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
494
- }
495
-
496
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
497
-#else
498
- TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
499
- unsigned a_bits = get_alignment_bits(memop);
500
- unsigned s_bits = memop & MO_SIZE;
501
- unsigned t_bits;
502
-
503
- if (TARGET_LONG_BITS == 32) {
504
- tcg_out_ext32u(s, TCG_REG_T1, addr);
505
- addr = TCG_REG_T1;
506
- }
507
-
508
- /*
509
- * Normal case: alignment equal to access size.
510
- */
511
- if (a_bits == s_bits) {
512
- tcg_out_ldst_rr(s, data, addr, index,
513
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
514
- return;
515
- }
516
-
517
- /*
518
- * Test for at least natural alignment, and assume most accesses
519
- * will be aligned -- perform a straight load in the delay slot.
520
- * This is required to preserve atomicity for aligned accesses.
521
- */
522
- t_bits = MAX(a_bits, s_bits);
523
- tcg_debug_assert(t_bits < 13);
524
- tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
525
-
526
- /* beq,a,pt %icc, label */
527
- label_ptr = s->code_ptr;
528
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
529
- /* delay slot */
530
- tcg_out_ldst_rr(s, data, addr, index,
531
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
532
-
533
- if (a_bits >= s_bits) {
534
- /*
535
- * Overalignment: A successful alignment test will perform the memory
536
- * operation in the delay slot, and failure need only invoke the
537
- * handler for SIGBUS.
538
- */
539
- tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
540
- /* delay slot -- move to low part of argument reg */
541
- tcg_out_mov_delay(s, TCG_REG_O1, addr);
542
- } else {
543
- /* Underalignment: load by pieces of minimum alignment. */
544
- int ld_opc, a_size, s_size, i;
545
-
546
- /*
547
- * Force full address into T1 early; avoids problems with
548
- * overlap between @addr and @data.
549
- */
550
- tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
551
-
552
- a_size = 1 << a_bits;
553
- s_size = 1 << s_bits;
554
- if ((memop & MO_BSWAP) == MO_BE) {
555
- ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)];
556
- tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
557
- ld_opc = qemu_ld_opc[a_bits | MO_BE];
558
- for (i = a_size; i < s_size; i += a_size) {
559
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
560
- tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX);
561
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
562
- }
563
- } else if (a_bits == 0) {
564
- ld_opc = LDUB;
565
- tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
566
- for (i = a_size; i < s_size; i += a_size) {
567
- if ((memop & MO_SIGN) && i == s_size - a_size) {
568
- ld_opc = LDSB;
569
- }
570
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
571
- tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
572
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
573
- }
127
- } else {
574
- } else {
128
- tb_page_addr_t phys_page2;
575
- ld_opc = qemu_ld_opc[a_bits | MO_LE];
129
- target_ulong virt_page2;
576
- tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc);
130
-
577
- for (i = a_size; i < s_size; i += a_size) {
131
- virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
578
- tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
132
- phys_page2 = get_page_addr_code(desc->env, virt_page2);
579
- if ((memop & MO_SIGN) && i == s_size - a_size) {
133
- if (tb->page_addr[1] == phys_page2) {
580
- ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN];
134
- return true;
581
- }
582
- tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc);
583
- tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
584
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
135
- }
585
- }
136
- }
586
- }
137
- }
587
- }
138
- return false;
588
-
139
-}
589
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
140
-
590
-#endif /* CONFIG_SOFTMMU */
141
-TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
591
}
142
- target_ulong cs_base, uint32_t flags,
592
143
- uint32_t cflags)
593
static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
144
-{
594
MemOpIdx oi, TCGType data_type)
145
- tb_page_addr_t phys_pc;
146
- struct tb_desc desc;
147
- uint32_t h;
148
-
149
- desc.env = cpu->env_ptr;
150
- desc.cs_base = cs_base;
151
- desc.flags = flags;
152
- desc.cflags = cflags;
153
- desc.trace_vcpu_dstate = *cpu->trace_dstate;
154
- desc.pc = pc;
155
- phys_pc = get_page_addr_code(desc.env, pc);
156
- if (phys_pc == -1) {
157
- return NULL;
158
- }
159
- desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
160
- h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
161
- return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
162
-}
163
-
164
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
165
{
595
{
166
if (TCG_TARGET_HAS_direct_jump) {
596
- MemOp memop = get_memop(oi);
597
- tcg_insn_unit *label_ptr;
598
+ static const int st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
599
+ [MO_UB] = STB,
600
601
-#ifdef CONFIG_SOFTMMU
602
- unsigned memi = get_mmuidx(oi);
603
- TCGReg addrz;
604
- const tcg_insn_unit *func;
605
+ [MO_BEUW] = STH,
606
+ [MO_BEUL] = STW,
607
+ [MO_BEUQ] = STX,
608
609
- addrz = tcg_out_tlb_load(s, addr, memi, memop,
610
- offsetof(CPUTLBEntry, addr_write));
611
+ [MO_LEUW] = STH_LE,
612
+ [MO_LEUL] = STW_LE,
613
+ [MO_LEUQ] = STX_LE,
614
+ };
615
616
- /* The fast path is exactly one insn. Thus we can perform the entire
617
- TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
618
- /* beq,a,pt %[xi]cc, label0 */
619
- label_ptr = s->code_ptr;
620
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
621
- | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
622
- /* delay slot */
623
- tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
624
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
625
+ TCGLabelQemuLdst *ldst;
626
+ HostAddress h;
627
628
- /* TLB Miss. */
629
+ ldst = prepare_host_addr(s, &h, addr, oi, false);
630
631
- tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
632
- tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
633
- TCG_REG_O2, data_type, memop & MO_SIZE, data);
634
+ tcg_out_ldst_rr(s, data, h.base, h.index,
635
+ st_opc[get_memop(oi) & (MO_BSWAP | MO_SIZE)]);
636
637
- func = qemu_st_trampoline[memop & MO_SIZE];
638
- tcg_debug_assert(func != NULL);
639
- tcg_out_call_nodelay(s, func, false);
640
- /* delay slot */
641
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
642
-
643
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
644
-#else
645
- TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
646
- unsigned a_bits = get_alignment_bits(memop);
647
- unsigned s_bits = memop & MO_SIZE;
648
- unsigned t_bits;
649
-
650
- if (TARGET_LONG_BITS == 32) {
651
- tcg_out_ext32u(s, TCG_REG_T1, addr);
652
- addr = TCG_REG_T1;
653
+ if (ldst) {
654
+ ldst->type = data_type;
655
+ ldst->datalo_reg = data;
656
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
657
}
658
-
659
- /*
660
- * Normal case: alignment equal to access size.
661
- */
662
- if (a_bits == s_bits) {
663
- tcg_out_ldst_rr(s, data, addr, index,
664
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
665
- return;
666
- }
667
-
668
- /*
669
- * Test for at least natural alignment, and assume most accesses
670
- * will be aligned -- perform a straight store in the delay slot.
671
- * This is required to preserve atomicity for aligned accesses.
672
- */
673
- t_bits = MAX(a_bits, s_bits);
674
- tcg_debug_assert(t_bits < 13);
675
- tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
676
-
677
- /* beq,a,pt %icc, label */
678
- label_ptr = s->code_ptr;
679
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
680
- /* delay slot */
681
- tcg_out_ldst_rr(s, data, addr, index,
682
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
683
-
684
- if (a_bits >= s_bits) {
685
- /*
686
- * Overalignment: A successful alignment test will perform the memory
687
- * operation in the delay slot, and failure need only invoke the
688
- * handler for SIGBUS.
689
- */
690
- tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
691
- /* delay slot -- move to low part of argument reg */
692
- tcg_out_mov_delay(s, TCG_REG_O1, addr);
693
- } else {
694
- /* Underalignment: store by pieces of minimum alignment. */
695
- int st_opc, a_size, s_size, i;
696
-
697
- /*
698
- * Force full address into T1 early; avoids problems with
699
- * overlap between @addr and @data.
700
- */
701
- tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
702
-
703
- a_size = 1 << a_bits;
704
- s_size = 1 << s_bits;
705
- if ((memop & MO_BSWAP) == MO_BE) {
706
- st_opc = qemu_st_opc[a_bits | MO_BE];
707
- for (i = 0; i < s_size; i += a_size) {
708
- TCGReg d = data;
709
- int shift = (s_size - a_size - i) * 8;
710
- if (shift) {
711
- d = TCG_REG_T2;
712
- tcg_out_arithi(s, d, data, shift, SHIFT_SRLX);
713
- }
714
- tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc);
715
- }
716
- } else if (a_bits == 0) {
717
- tcg_out_ldst(s, data, TCG_REG_T1, 0, STB);
718
- for (i = 1; i < s_size; i++) {
719
- tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
720
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB);
721
- }
722
- } else {
723
- /* Note that ST*A with immediate asi must use indexed address. */
724
- st_opc = qemu_st_opc[a_bits + MO_LE];
725
- tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc);
726
- for (i = a_size; i < s_size; i += a_size) {
727
- tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
728
- tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
729
- tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc);
730
- }
731
- }
732
- }
733
-
734
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
735
-#endif /* CONFIG_SOFTMMU */
736
}
737
738
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
739
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
740
case INDEX_op_extu_i32_i64:
741
case INDEX_op_extrl_i64_i32:
742
case INDEX_op_extrh_i64_i32:
743
+ case INDEX_op_qemu_ld_i32:
744
+ case INDEX_op_qemu_ld_i64:
745
return C_O1_I1(r, r);
746
747
case INDEX_op_st8_i32:
748
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
749
case INDEX_op_st_i32:
750
case INDEX_op_st32_i64:
751
case INDEX_op_st_i64:
752
+ case INDEX_op_qemu_st_i32:
753
+ case INDEX_op_qemu_st_i64:
754
return C_O0_I2(rZ, r);
755
756
case INDEX_op_add_i32:
757
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
758
case INDEX_op_muluh_i64:
759
return C_O1_I2(r, r, r);
760
761
- case INDEX_op_qemu_ld_i32:
762
- case INDEX_op_qemu_ld_i64:
763
- return C_O1_I1(r, s);
764
- case INDEX_op_qemu_st_i32:
765
- case INDEX_op_qemu_st_i64:
766
- return C_O0_I2(sZ, s);
767
-
768
default:
769
g_assert_not_reached();
770
}
167
--
771
--
168
2.34.1
772
2.34.1
diff view generated by jsdifflib
1
The mmap_lock is held around tb_gen_code. While the comment
1
These functions are now unused.
2
is correct that the lock is dropped when tb_gen_code runs out
3
of memory, the lock is *not* dropped when an exception is
4
raised reading code for translation.
5
2
6
Acked-by: Alistair Francis <alistair.francis@wdc.com>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
8
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
5
---
11
accel/tcg/cpu-exec.c | 12 ++++++------
6
include/tcg/tcg-ldst.h | 6 ------
12
accel/tcg/user-exec.c | 3 ---
7
accel/tcg/user-exec.c | 10 ----------
13
2 files changed, 6 insertions(+), 9 deletions(-)
8
2 files changed, 16 deletions(-)
14
9
15
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
10
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
16
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
17
--- a/accel/tcg/cpu-exec.c
12
--- a/include/tcg/tcg-ldst.h
18
+++ b/accel/tcg/cpu-exec.c
13
+++ b/include/tcg/tcg-ldst.h
19
@@ -XXX,XX +XXX,XX @@ void cpu_exec_step_atomic(CPUState *cpu)
14
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
20
cpu_tb_exec(cpu, tb, &tb_exit);
15
void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
21
cpu_exec_exit(cpu);
16
MemOpIdx oi, uintptr_t retaddr);
22
} else {
17
23
- /*
18
-#ifdef CONFIG_USER_ONLY
24
- * The mmap_lock is dropped by tb_gen_code if it runs out of
19
-
25
- * memory.
20
-G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
26
- */
21
-G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
27
#ifndef CONFIG_SOFTMMU
22
-
28
clear_helper_retaddr();
23
-#endif /* CONFIG_USER_ONLY */
29
- tcg_debug_assert(!have_mmap_lock());
24
#endif /* TCG_LDST_H */
30
+ if (have_mmap_lock()) {
31
+ mmap_unlock();
32
+ }
33
#endif
34
if (qemu_mutex_iothread_locked()) {
35
qemu_mutex_unlock_iothread();
36
@@ -XXX,XX +XXX,XX @@ int cpu_exec(CPUState *cpu)
37
38
#ifndef CONFIG_SOFTMMU
39
clear_helper_retaddr();
40
- tcg_debug_assert(!have_mmap_lock());
41
+ if (have_mmap_lock()) {
42
+ mmap_unlock();
43
+ }
44
#endif
45
if (qemu_mutex_iothread_locked()) {
46
qemu_mutex_unlock_iothread();
47
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
25
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
48
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
49
--- a/accel/tcg/user-exec.c
27
--- a/accel/tcg/user-exec.c
50
+++ b/accel/tcg/user-exec.c
28
+++ b/accel/tcg/user-exec.c
51
@@ -XXX,XX +XXX,XX @@ MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
29
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
52
* (and if the translator doesn't handle page boundaries correctly
30
53
* there's little we can do about that here). Therefore, do not
31
/* The softmmu versions of these helpers are in cputlb.c. */
54
* trigger the unwinder.
32
55
- *
33
-void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
56
- * Like tb_gen_code, release the memory lock before cpu_loop_exit.
34
-{
57
*/
35
- cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
58
- mmap_unlock();
36
-}
59
*pc = 0;
37
-
60
return MMU_INST_FETCH;
38
-void helper_unaligned_st(CPUArchState *env, target_ulong addr)
61
}
39
-{
40
- cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
41
-}
42
-
43
static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
44
MemOp mop, uintptr_t ra, MMUAccessType type)
45
{
62
--
46
--
63
2.34.1
47
2.34.1
diff view generated by jsdifflib
New patch
1
This should be true of all loongarch64 running Linux.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/loongarch64/tcg-target.c.inc | 9 +++++++++
7
1 file changed, 9 insertions(+)
8
9
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/loongarch64/tcg-target.c.inc
12
+++ b/tcg/loongarch64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@
14
*/
15
16
#include "../tcg-ldst.c.inc"
17
+#include <asm/hwcap.h>
18
19
#ifdef CONFIG_DEBUG_TCG
20
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
21
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
22
23
static void tcg_target_init(TCGContext *s)
24
{
25
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
26
+
27
+ /* Server and desktop class cpus have UAL; embedded cpus do not. */
28
+ if (!(hwcap & HWCAP_LOONGARCH_UAL)) {
29
+ error_report("TCG: unaligned access support required; exiting");
30
+ exit(EXIT_FAILURE);
31
+ }
32
+
33
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
34
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
35
36
--
37
2.34.1
diff view generated by jsdifflib
New patch
1
Test the final byte of an unaligned access.
2
Use BSTRINS.D to clear the range of bits, rather than AND.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/loongarch64/tcg-target.c.inc | 19 ++++++++++++-------
8
1 file changed, 12 insertions(+), 7 deletions(-)
9
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/loongarch64/tcg-target.c.inc
13
+++ b/tcg/loongarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
16
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
17
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
18
- tcg_target_long compare_mask;
19
20
ldst = new_ldst_label(s);
21
ldst->is_ld = is_ld;
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
24
offsetof(CPUTLBEntry, addend));
25
26
- /* We don't support unaligned accesses. */
27
+ /*
28
+ * For aligned accesses, we check the first byte and include the alignment
29
+ * bits within the address. For unaligned access, we check that we don't
30
+ * cross pages using the address of the last byte of the access.
31
+ */
32
if (a_bits < s_bits) {
33
- a_bits = s_bits;
34
+ unsigned a_mask = (1u << a_bits) - 1;
35
+ unsigned s_mask = (1u << s_bits) - 1;
36
+ tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
37
+ } else {
38
+ tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg);
39
}
40
- /* Clear the non-page, non-alignment bits from the address. */
41
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
42
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
43
- tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
44
+ tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
45
+ a_bits, TARGET_PAGE_BITS - 1);
46
47
/* Compare masked address with the TLB entry. */
48
ldst->label_ptr[0] = s->code_ptr;
49
--
50
2.34.1
diff view generated by jsdifflib
New patch
1
The system is required to emulate unaligned accesses, even if the
2
hardware does not support it. The resulting trap may or may not
3
be more efficient than the qemu slow path. There are linux kernel
4
patches in flight to allow userspace to query hardware support;
5
we can re-evaluate whether to enable this by default after that.
1
6
7
In the meantime, softmmu now matches useronly, where we already
8
assumed that unaligned accesses are supported.
9
10
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/riscv/tcg-target.c.inc | 48 ++++++++++++++++++++++----------------
14
1 file changed, 28 insertions(+), 20 deletions(-)
15
16
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/riscv/tcg-target.c.inc
19
+++ b/tcg/riscv/tcg-target.c.inc
20
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
21
22
#ifdef CONFIG_SOFTMMU
23
unsigned s_bits = opc & MO_SIZE;
24
+ unsigned s_mask = (1u << s_bits) - 1;
25
int mem_index = get_mmuidx(oi);
26
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
27
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
28
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
29
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
30
- tcg_target_long compare_mask;
31
+ int compare_mask;
32
+ TCGReg addr_adj;
33
34
ldst = new_ldst_label(s);
35
ldst->is_ld = is_ld;
36
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
37
38
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
39
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
40
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
41
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
42
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
43
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
44
45
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
46
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
47
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
48
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
49
50
+ /*
51
+ * For aligned accesses, we check the first byte and include the alignment
52
+ * bits within the address. For unaligned access, we check that we don't
53
+ * cross pages using the address of the last byte of the access.
54
+ */
55
+ addr_adj = addr_reg;
56
+ if (a_bits < s_bits) {
57
+ addr_adj = TCG_REG_TMP0;
58
+ tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
59
+ addr_adj, addr_reg, s_mask - a_mask);
60
+ }
61
+ compare_mask = TARGET_PAGE_MASK | a_mask;
62
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
63
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
64
+ } else {
65
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
66
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
67
+ }
68
+
69
/* Load the tlb comparator and the addend. */
70
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
71
is_ld ? offsetof(CPUTLBEntry, addr_read)
72
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
73
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
74
offsetof(CPUTLBEntry, addend));
75
76
- /* We don't support unaligned accesses. */
77
- if (a_bits < s_bits) {
78
- a_bits = s_bits;
79
- }
80
- /* Clear the non-page, non-alignment bits from the address. */
81
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
82
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
83
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
84
- } else {
85
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
86
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
87
- }
88
-
89
/* Compare masked address with the TLB entry. */
90
ldst->label_ptr[0] = s->code_ptr;
91
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
92
93
/* TLB Hit - translate address using addend. */
94
+ addr_adj = addr_reg;
95
if (TARGET_LONG_BITS == 32) {
96
- tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
97
- addr_reg = TCG_REG_TMP0;
98
+ addr_adj = TCG_REG_TMP0;
99
+ tcg_out_ext32u(s, addr_adj, addr_reg);
100
}
101
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
102
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_adj);
103
*pbase = TCG_REG_TMP0;
104
#else
105
if (a_mask) {
106
--
107
2.34.1
diff view generated by jsdifflib
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
Replace the unparameterized TCG_TARGET_HAS_MEMORY_BSWAP macro
2
with a function with a memop argument.
2
3
3
Currently it's possible to execute pages that do not have PAGE_EXEC
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
if there is an existing translation block. Fix by invalidating TBs
5
that touch the affected pages.
6
7
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
8
Message-Id: <20220817150506.592862-2-iii@linux.ibm.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
linux-user/mmap.c | 6 ++++--
7
tcg/aarch64/tcg-target.h | 1 -
12
1 file changed, 4 insertions(+), 2 deletions(-)
8
tcg/arm/tcg-target.h | 1 -
9
tcg/i386/tcg-target.h | 3 ---
10
tcg/loongarch64/tcg-target.h | 2 --
11
tcg/mips/tcg-target.h | 2 --
12
tcg/ppc/tcg-target.h | 1 -
13
tcg/riscv/tcg-target.h | 2 --
14
tcg/s390x/tcg-target.h | 2 --
15
tcg/sparc64/tcg-target.h | 1 -
16
tcg/tcg-internal.h | 2 ++
17
tcg/tci/tcg-target.h | 2 --
18
tcg/tcg-op.c | 20 +++++++++++---------
19
tcg/aarch64/tcg-target.c.inc | 5 +++++
20
tcg/arm/tcg-target.c.inc | 5 +++++
21
tcg/i386/tcg-target.c.inc | 5 +++++
22
tcg/loongarch64/tcg-target.c.inc | 5 +++++
23
tcg/mips/tcg-target.c.inc | 5 +++++
24
tcg/ppc/tcg-target.c.inc | 5 +++++
25
tcg/riscv/tcg-target.c.inc | 5 +++++
26
tcg/s390x/tcg-target.c.inc | 5 +++++
27
tcg/sparc64/tcg-target.c.inc | 5 +++++
28
tcg/tci/tcg-target.c.inc | 5 +++++
29
22 files changed, 63 insertions(+), 26 deletions(-)
13
30
14
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
31
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
15
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
16
--- a/linux-user/mmap.c
33
--- a/tcg/aarch64/tcg-target.h
17
+++ b/linux-user/mmap.c
34
+++ b/tcg/aarch64/tcg-target.h
18
@@ -XXX,XX +XXX,XX @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
35
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
19
goto error;
36
#define TCG_TARGET_HAS_cmpsel_vec 0
20
}
37
38
#define TCG_TARGET_DEFAULT_MO (0)
39
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
40
#define TCG_TARGET_NEED_LDST_LABELS
41
#define TCG_TARGET_NEED_POOL_LABELS
42
43
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tcg/arm/tcg-target.h
46
+++ b/tcg/arm/tcg-target.h
47
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
48
#define TCG_TARGET_HAS_cmpsel_vec 0
49
50
#define TCG_TARGET_DEFAULT_MO (0)
51
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
52
#define TCG_TARGET_NEED_LDST_LABELS
53
#define TCG_TARGET_NEED_POOL_LABELS
54
55
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/tcg/i386/tcg-target.h
58
+++ b/tcg/i386/tcg-target.h
59
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
60
#include "tcg/tcg-mo.h"
61
62
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
63
-
64
-#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
65
-
66
#define TCG_TARGET_NEED_LDST_LABELS
67
#define TCG_TARGET_NEED_POOL_LABELS
68
69
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
70
index XXXXXXX..XXXXXXX 100644
71
--- a/tcg/loongarch64/tcg-target.h
72
+++ b/tcg/loongarch64/tcg-target.h
73
@@ -XXX,XX +XXX,XX @@ typedef enum {
74
75
#define TCG_TARGET_NEED_LDST_LABELS
76
77
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
78
-
79
#endif /* LOONGARCH_TCG_TARGET_H */
80
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/mips/tcg-target.h
83
+++ b/tcg/mips/tcg-target.h
84
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
85
#endif
86
87
#define TCG_TARGET_DEFAULT_MO 0
88
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
89
-
90
#define TCG_TARGET_NEED_LDST_LABELS
91
92
#endif
93
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/tcg/ppc/tcg-target.h
96
+++ b/tcg/ppc/tcg-target.h
97
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
98
#define TCG_TARGET_HAS_cmpsel_vec 0
99
100
#define TCG_TARGET_DEFAULT_MO (0)
101
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
102
#define TCG_TARGET_NEED_LDST_LABELS
103
#define TCG_TARGET_NEED_POOL_LABELS
104
105
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/tcg/riscv/tcg-target.h
108
+++ b/tcg/riscv/tcg-target.h
109
@@ -XXX,XX +XXX,XX @@ typedef enum {
110
#define TCG_TARGET_NEED_LDST_LABELS
111
#define TCG_TARGET_NEED_POOL_LABELS
112
113
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
114
-
115
#endif
116
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
117
index XXXXXXX..XXXXXXX 100644
118
--- a/tcg/s390x/tcg-target.h
119
+++ b/tcg/s390x/tcg-target.h
120
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
121
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
122
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
123
124
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
125
-
126
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
127
#define TCG_TARGET_NEED_LDST_LABELS
128
#define TCG_TARGET_NEED_POOL_LABELS
129
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
130
index XXXXXXX..XXXXXXX 100644
131
--- a/tcg/sparc64/tcg-target.h
132
+++ b/tcg/sparc64/tcg-target.h
133
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
134
#define TCG_AREG0 TCG_REG_I0
135
136
#define TCG_TARGET_DEFAULT_MO (0)
137
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
138
#define TCG_TARGET_NEED_LDST_LABELS
139
#define TCG_TARGET_NEED_POOL_LABELS
140
141
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
142
index XXXXXXX..XXXXXXX 100644
143
--- a/tcg/tcg-internal.h
144
+++ b/tcg/tcg-internal.h
145
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t)
146
return temp_tcgv_i64(tcgv_i128_temp(t) + o);
147
}
148
149
+bool tcg_target_has_memory_bswap(MemOp memop);
150
+
151
#endif /* TCG_INTERNAL_H */
152
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
153
index XXXXXXX..XXXXXXX 100644
154
--- a/tcg/tci/tcg-target.h
155
+++ b/tcg/tci/tcg-target.h
156
@@ -XXX,XX +XXX,XX @@ typedef enum {
157
We prefer consistency across hosts on this. */
158
#define TCG_TARGET_DEFAULT_MO (0)
159
160
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
161
-
162
#endif /* TCG_TARGET_H */
163
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/tcg/tcg-op.c
166
+++ b/tcg/tcg-op.c
167
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
168
oi = make_memop_idx(memop, idx);
169
170
orig_memop = memop;
171
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
172
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
173
memop &= ~MO_BSWAP;
174
/* The bswap primitive benefits from zero-extended input. */
175
if ((memop & MO_SSIZE) == MO_SW) {
176
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
177
memop = tcg_canonicalize_memop(memop, 0, 1);
178
oi = make_memop_idx(memop, idx);
179
180
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
181
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
182
swap = tcg_temp_ebb_new_i32();
183
switch (memop & MO_SIZE) {
184
case MO_16:
185
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
186
oi = make_memop_idx(memop, idx);
187
188
orig_memop = memop;
189
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
190
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
191
memop &= ~MO_BSWAP;
192
/* The bswap primitive benefits from zero-extended input. */
193
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
194
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
195
memop = tcg_canonicalize_memop(memop, 1, 1);
196
oi = make_memop_idx(memop, idx);
197
198
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
199
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
200
swap = tcg_temp_ebb_new_i64();
201
switch (memop & MO_SIZE) {
202
case MO_16:
203
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
204
tcg_debug_assert((orig & MO_SIZE) == MO_128);
205
tcg_debug_assert((orig & MO_SIGN) == 0);
206
207
- /* Use a memory ordering implemented by the host. */
208
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
209
- mop_1 &= ~MO_BSWAP;
210
- }
211
-
212
/* Reduce the size to 64-bit. */
213
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
214
215
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
216
default:
217
g_assert_not_reached();
21
}
218
}
22
+
219
+
23
page_set_flags(start, start + len, page_flags);
220
+ /* Use a memory ordering implemented by the host. */
24
- mmap_unlock();
221
+ if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
25
- return 0;
222
+ mop_1 &= ~MO_BSWAP;
26
+ tb_invalidate_phys_range(start, start + len);
223
+ mop_2 &= ~MO_BSWAP;
27
+ ret = 0;
224
+ }
28
+
225
+
29
error:
226
ret[0] = mop_1;
30
mmap_unlock();
227
ret[1] = mop_2;
31
return ret;
228
}
229
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
230
index XXXXXXX..XXXXXXX 100644
231
--- a/tcg/aarch64/tcg-target.c.inc
232
+++ b/tcg/aarch64/tcg-target.c.inc
233
@@ -XXX,XX +XXX,XX @@ typedef struct {
234
TCGType index_ext;
235
} HostAddress;
236
237
+bool tcg_target_has_memory_bswap(MemOp memop)
238
+{
239
+ return false;
240
+}
241
+
242
static const TCGLdstHelperParam ldst_helper_param = {
243
.ntmp = 1, .tmp = { TCG_REG_TMP }
244
};
245
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
246
index XXXXXXX..XXXXXXX 100644
247
--- a/tcg/arm/tcg-target.c.inc
248
+++ b/tcg/arm/tcg-target.c.inc
249
@@ -XXX,XX +XXX,XX @@ typedef struct {
250
bool index_scratch;
251
} HostAddress;
252
253
+bool tcg_target_has_memory_bswap(MemOp memop)
254
+{
255
+ return false;
256
+}
257
+
258
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
259
{
260
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
261
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
262
index XXXXXXX..XXXXXXX 100644
263
--- a/tcg/i386/tcg-target.c.inc
264
+++ b/tcg/i386/tcg-target.c.inc
265
@@ -XXX,XX +XXX,XX @@ typedef struct {
266
int seg;
267
} HostAddress;
268
269
+bool tcg_target_has_memory_bswap(MemOp memop)
270
+{
271
+ return have_movbe;
272
+}
273
+
274
/*
275
* Because i686 has no register parameters and because x86_64 has xchg
276
* to handle addr/data register overlap, we have placed all input arguments
277
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
278
index XXXXXXX..XXXXXXX 100644
279
--- a/tcg/loongarch64/tcg-target.c.inc
280
+++ b/tcg/loongarch64/tcg-target.c.inc
281
@@ -XXX,XX +XXX,XX @@ typedef struct {
282
TCGReg index;
283
} HostAddress;
284
285
+bool tcg_target_has_memory_bswap(MemOp memop)
286
+{
287
+ return false;
288
+}
289
+
290
/*
291
* For softmmu, perform the TLB load and compare.
292
* For useronly, perform any required alignment tests.
293
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
294
index XXXXXXX..XXXXXXX 100644
295
--- a/tcg/mips/tcg-target.c.inc
296
+++ b/tcg/mips/tcg-target.c.inc
297
@@ -XXX,XX +XXX,XX @@ typedef struct {
298
MemOp align;
299
} HostAddress;
300
301
+bool tcg_target_has_memory_bswap(MemOp memop)
302
+{
303
+ return false;
304
+}
305
+
306
/*
307
* For softmmu, perform the TLB load and compare.
308
* For useronly, perform any required alignment tests.
309
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
310
index XXXXXXX..XXXXXXX 100644
311
--- a/tcg/ppc/tcg-target.c.inc
312
+++ b/tcg/ppc/tcg-target.c.inc
313
@@ -XXX,XX +XXX,XX @@ typedef struct {
314
TCGReg index;
315
} HostAddress;
316
317
+bool tcg_target_has_memory_bswap(MemOp memop)
318
+{
319
+ return true;
320
+}
321
+
322
/*
323
* For softmmu, perform the TLB load and compare.
324
* For useronly, perform any required alignment tests.
325
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
326
index XXXXXXX..XXXXXXX 100644
327
--- a/tcg/riscv/tcg-target.c.inc
328
+++ b/tcg/riscv/tcg-target.c.inc
329
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
330
tcg_debug_assert(ok);
331
}
332
333
+bool tcg_target_has_memory_bswap(MemOp memop)
334
+{
335
+ return false;
336
+}
337
+
338
/* We have three temps, we might as well expose them. */
339
static const TCGLdstHelperParam ldst_helper_param = {
340
.ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
341
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
342
index XXXXXXX..XXXXXXX 100644
343
--- a/tcg/s390x/tcg-target.c.inc
344
+++ b/tcg/s390x/tcg-target.c.inc
345
@@ -XXX,XX +XXX,XX @@ typedef struct {
346
int disp;
347
} HostAddress;
348
349
+bool tcg_target_has_memory_bswap(MemOp memop)
350
+{
351
+ return true;
352
+}
353
+
354
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
355
HostAddress h)
356
{
357
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
358
index XXXXXXX..XXXXXXX 100644
359
--- a/tcg/sparc64/tcg-target.c.inc
360
+++ b/tcg/sparc64/tcg-target.c.inc
361
@@ -XXX,XX +XXX,XX @@ typedef struct {
362
TCGReg index;
363
} HostAddress;
364
365
+bool tcg_target_has_memory_bswap(MemOp memop)
366
+{
367
+ return true;
368
+}
369
+
370
/*
371
* For softmmu, perform the TLB load and compare.
372
* For useronly, perform any required alignment tests.
373
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
374
index XXXXXXX..XXXXXXX 100644
375
--- a/tcg/tci/tcg-target.c.inc
376
+++ b/tcg/tci/tcg-target.c.inc
377
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
378
static inline void tcg_target_qemu_prologue(TCGContext *s)
379
{
380
}
381
+
382
+bool tcg_target_has_memory_bswap(MemOp memop)
383
+{
384
+ return true;
385
+}
32
--
386
--
33
2.34.1
387
2.34.1
diff view generated by jsdifflib
New patch
1
Add opcodes for backend support for 128-bit memory operations.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
docs/devel/tcg-ops.rst | 11 +++---
8
include/tcg/tcg-opc.h | 8 +++++
9
tcg/aarch64/tcg-target.h | 2 ++
10
tcg/arm/tcg-target.h | 2 ++
11
tcg/i386/tcg-target.h | 2 ++
12
tcg/loongarch64/tcg-target.h | 1 +
13
tcg/mips/tcg-target.h | 2 ++
14
tcg/ppc/tcg-target.h | 2 ++
15
tcg/riscv/tcg-target.h | 2 ++
16
tcg/s390x/tcg-target.h | 2 ++
17
tcg/sparc64/tcg-target.h | 2 ++
18
tcg/tci/tcg-target.h | 2 ++
19
tcg/tcg-op.c | 69 ++++++++++++++++++++++++++++++++----
20
tcg/tcg.c | 6 ++++
21
14 files changed, 103 insertions(+), 10 deletions(-)
22
23
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
24
index XXXXXXX..XXXXXXX 100644
25
--- a/docs/devel/tcg-ops.rst
26
+++ b/docs/devel/tcg-ops.rst
27
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
28
| This operation is optional. If the TCG backend does not implement the
29
goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
30
31
- * - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx*
32
+ * - qemu_ld_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
33
34
- qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx*
35
+ qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
36
37
qemu_st8_i32 *t0*, *t1*, *flags*, *memidx*
38
39
- | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest
40
- address *t1*. The _i32/_i64 size applies to the size of the input/output
41
+ address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output
42
register *t0* only. The address *t1* is always sized according to the guest,
43
and the width of the memory operation is controlled by *flags*.
44
|
45
| Both *t0* and *t1* may be split into little-endian ordered pairs of registers
46
- if dealing with 64-bit quantities on a 32-bit host.
47
+ if dealing with 64-bit quantities on a 32-bit host, or 128-bit quantities on
48
+ a 64-bit host.
49
|
50
| The *memidx* selects the qemu tlb index to use (e.g. user or kernel access).
51
The flags are the MemOp bits, selecting the sign, width, and endianness
52
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
53
| For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
54
64-bit memory access specified in *flags*.
55
|
56
+ | For qemu_ld/st_i128, these are only supported for a 64-bit host.
57
+ |
58
| For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
59
the memory operation is known to be 8-bit. This allows the backend to
60
provide a different set of register constraints.
61
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/tcg/tcg-opc.h
64
+++ b/include/tcg/tcg-opc.h
65
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
66
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
67
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
68
69
+/* Only for 64-bit hosts at the moment. */
70
+DEF(qemu_ld_i128, 2, 1, 1,
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
72
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
73
+DEF(qemu_st_i128, 0, 3, 1,
74
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
75
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
76
+
77
/* Host vector support. */
78
79
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
80
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/aarch64/tcg-target.h
83
+++ b/tcg/aarch64/tcg-target.h
84
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
85
#define TCG_TARGET_HAS_muluh_i64 1
86
#define TCG_TARGET_HAS_mulsh_i64 1
87
88
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
89
+
90
#define TCG_TARGET_HAS_v64 1
91
#define TCG_TARGET_HAS_v128 1
92
#define TCG_TARGET_HAS_v256 0
93
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/tcg/arm/tcg-target.h
96
+++ b/tcg/arm/tcg-target.h
97
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
98
#define TCG_TARGET_HAS_rem_i32 0
99
#define TCG_TARGET_HAS_qemu_st8_i32 0
100
101
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
102
+
103
#define TCG_TARGET_HAS_v64 use_neon_instructions
104
#define TCG_TARGET_HAS_v128 use_neon_instructions
105
#define TCG_TARGET_HAS_v256 0
106
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
107
index XXXXXXX..XXXXXXX 100644
108
--- a/tcg/i386/tcg-target.h
109
+++ b/tcg/i386/tcg-target.h
110
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
111
#define TCG_TARGET_HAS_qemu_st8_i32 1
112
#endif
113
114
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
115
+
116
/* We do not support older SSE systems, only beginning with AVX1. */
117
#define TCG_TARGET_HAS_v64 have_avx1
118
#define TCG_TARGET_HAS_v128 have_avx1
119
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
120
index XXXXXXX..XXXXXXX 100644
121
--- a/tcg/loongarch64/tcg-target.h
122
+++ b/tcg/loongarch64/tcg-target.h
123
@@ -XXX,XX +XXX,XX @@ typedef enum {
124
#define TCG_TARGET_HAS_muls2_i64 0
125
#define TCG_TARGET_HAS_muluh_i64 1
126
#define TCG_TARGET_HAS_mulsh_i64 1
127
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
128
129
#define TCG_TARGET_DEFAULT_MO (0)
130
131
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/tcg/mips/tcg-target.h
134
+++ b/tcg/mips/tcg-target.h
135
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
136
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
137
#endif
138
139
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
140
+
141
#define TCG_TARGET_DEFAULT_MO 0
142
#define TCG_TARGET_NEED_LDST_LABELS
143
144
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
145
index XXXXXXX..XXXXXXX 100644
146
--- a/tcg/ppc/tcg-target.h
147
+++ b/tcg/ppc/tcg-target.h
148
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
149
#define TCG_TARGET_HAS_mulsh_i64 1
150
#endif
151
152
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
153
+
154
/*
155
* While technically Altivec could support V64, it has no 64-bit store
156
* instruction and substituting two 32-bit stores makes the generated
157
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tcg/riscv/tcg-target.h
160
+++ b/tcg/riscv/tcg-target.h
161
@@ -XXX,XX +XXX,XX @@ typedef enum {
162
#define TCG_TARGET_HAS_muluh_i64 1
163
#define TCG_TARGET_HAS_mulsh_i64 1
164
165
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
166
+
167
#define TCG_TARGET_DEFAULT_MO (0)
168
169
#define TCG_TARGET_NEED_LDST_LABELS
170
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
171
index XXXXXXX..XXXXXXX 100644
172
--- a/tcg/s390x/tcg-target.h
173
+++ b/tcg/s390x/tcg-target.h
174
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
175
#define TCG_TARGET_HAS_muluh_i64 0
176
#define TCG_TARGET_HAS_mulsh_i64 0
177
178
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
179
+
180
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
181
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
182
#define TCG_TARGET_HAS_v256 0
183
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
184
index XXXXXXX..XXXXXXX 100644
185
--- a/tcg/sparc64/tcg-target.h
186
+++ b/tcg/sparc64/tcg-target.h
187
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
188
#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
189
#define TCG_TARGET_HAS_mulsh_i64 0
190
191
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
192
+
193
#define TCG_AREG0 TCG_REG_I0
194
195
#define TCG_TARGET_DEFAULT_MO (0)
196
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
197
index XXXXXXX..XXXXXXX 100644
198
--- a/tcg/tci/tcg-target.h
199
+++ b/tcg/tci/tcg-target.h
200
@@ -XXX,XX +XXX,XX @@
201
#define TCG_TARGET_HAS_mulu2_i32 1
202
#endif /* TCG_TARGET_REG_BITS == 64 */
203
204
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
205
+
206
/* Number of registers available. */
207
#define TCG_TARGET_NB_REGS 16
208
209
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
210
index XXXXXXX..XXXXXXX 100644
211
--- a/tcg/tcg-op.c
212
+++ b/tcg/tcg-op.c
213
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
214
215
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
216
{
217
- MemOpIdx oi = make_memop_idx(memop, idx);
218
+ const MemOpIdx oi = make_memop_idx(memop, idx);
219
220
tcg_debug_assert((memop & MO_SIZE) == MO_128);
221
tcg_debug_assert((memop & MO_SIGN) == 0);
222
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
223
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
224
addr = plugin_prep_mem_callbacks(addr);
225
226
- /* TODO: allow the tcg backend to see the whole operation. */
227
+ /* TODO: For now, force 32-bit hosts to use the helper. */
228
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
229
+ TCGv_i64 lo, hi;
230
+ TCGArg addr_arg;
231
+ MemOpIdx adj_oi;
232
+ bool need_bswap = false;
233
234
- if (use_two_i64_for_i128(memop)) {
235
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
236
+ lo = TCGV128_HIGH(val);
237
+ hi = TCGV128_LOW(val);
238
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
239
+ need_bswap = true;
240
+ } else {
241
+ lo = TCGV128_LOW(val);
242
+ hi = TCGV128_HIGH(val);
243
+ adj_oi = oi;
244
+ }
245
+
246
+#if TARGET_LONG_BITS == 32
247
+ addr_arg = tcgv_i32_arg(addr);
248
+#else
249
+ addr_arg = tcgv_i64_arg(addr);
250
+#endif
251
+ tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
252
+
253
+ if (need_bswap) {
254
+ tcg_gen_bswap64_i64(lo, lo);
255
+ tcg_gen_bswap64_i64(hi, hi);
256
+ }
257
+ } else if (use_two_i64_for_i128(memop)) {
258
MemOp mop[2];
259
TCGv addr_p8;
260
TCGv_i64 x, y;
261
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
262
263
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
264
{
265
- MemOpIdx oi = make_memop_idx(memop, idx);
266
+ const MemOpIdx oi = make_memop_idx(memop, idx);
267
268
tcg_debug_assert((memop & MO_SIZE) == MO_128);
269
tcg_debug_assert((memop & MO_SIGN) == 0);
270
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
271
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
272
addr = plugin_prep_mem_callbacks(addr);
273
274
- /* TODO: allow the tcg backend to see the whole operation. */
275
+ /* TODO: For now, force 32-bit hosts to use the helper. */
276
277
- if (use_two_i64_for_i128(memop)) {
278
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
279
+ TCGv_i64 lo, hi;
280
+ TCGArg addr_arg;
281
+ MemOpIdx adj_oi;
282
+ bool need_bswap = false;
283
+
284
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
285
+ lo = tcg_temp_new_i64();
286
+ hi = tcg_temp_new_i64();
287
+ tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
288
+ tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
289
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
290
+ need_bswap = true;
291
+ } else {
292
+ lo = TCGV128_LOW(val);
293
+ hi = TCGV128_HIGH(val);
294
+ adj_oi = oi;
295
+ }
296
+
297
+#if TARGET_LONG_BITS == 32
298
+ addr_arg = tcgv_i32_arg(addr);
299
+#else
300
+ addr_arg = tcgv_i64_arg(addr);
301
+#endif
302
+ tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
303
+
304
+ if (need_bswap) {
305
+ tcg_temp_free_i64(lo);
306
+ tcg_temp_free_i64(hi);
307
+ }
308
+ } else if (use_two_i64_for_i128(memop)) {
309
MemOp mop[2];
310
TCGv addr_p8;
311
TCGv_i64 x, y;
312
diff --git a/tcg/tcg.c b/tcg/tcg.c
313
index XXXXXXX..XXXXXXX 100644
314
--- a/tcg/tcg.c
315
+++ b/tcg/tcg.c
316
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
317
case INDEX_op_qemu_st8_i32:
318
return TCG_TARGET_HAS_qemu_st8_i32;
319
320
+ case INDEX_op_qemu_ld_i128:
321
+ case INDEX_op_qemu_st_i128:
322
+ return TCG_TARGET_HAS_qemu_ldst_i128;
323
+
324
case INDEX_op_mov_i32:
325
case INDEX_op_setcond_i32:
326
case INDEX_op_brcond_i32:
327
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
328
case INDEX_op_qemu_st8_i32:
329
case INDEX_op_qemu_ld_i64:
330
case INDEX_op_qemu_st_i64:
331
+ case INDEX_op_qemu_ld_i128:
332
+ case INDEX_op_qemu_st_i128:
333
{
334
const char *s_al, *s_op, *s_at;
335
MemOpIdx oi = op->args[k++];
336
--
337
2.34.1
338
339
diff view generated by jsdifflib
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
With x86_64 as host, we do not have any temporaries with which to
2
resolve cycles, but we do have xchg. As a side bonus, the set of
3
graphs that can be made with 3 nodes and all nodes conflicting is
4
small: two. We can solve the cycle with a single temp.
2
5
3
Right now translator stops right *after* the end of a page, which
6
This is required for x86_64 to handle stores of i128: 1 address
4
breaks reporting of fault locations when the last instruction of a
7
register and 2 data registers.
5
multi-insn translation block crosses a page boundary.
6
8
7
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20220817150506.592862-3-iii@linux.ibm.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
11
---
12
target/s390x/tcg/translate.c | 15 +++-
12
tcg/tcg.c | 138 ++++++++++++++++++++++++++++++++++++++++++------------
13
tests/tcg/s390x/noexec.c | 106 +++++++++++++++++++++++
13
1 file changed, 108 insertions(+), 30 deletions(-)
14
tests/tcg/multiarch/noexec.c.inc | 139 +++++++++++++++++++++++++++++++
15
tests/tcg/s390x/Makefile.target | 1 +
16
4 files changed, 257 insertions(+), 4 deletions(-)
17
create mode 100644 tests/tcg/s390x/noexec.c
18
create mode 100644 tests/tcg/multiarch/noexec.c.inc
19
14
20
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
15
diff --git a/tcg/tcg.c b/tcg/tcg.c
21
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
22
--- a/target/s390x/tcg/translate.c
17
--- a/tcg/tcg.c
23
+++ b/target/s390x/tcg/translate.c
18
+++ b/tcg/tcg.c
24
@@ -XXX,XX +XXX,XX @@ static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs)
19
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
25
dc->insn_start = tcg_last_op();
20
tcg_out_movext1_new_src(s, i1, src1);
26
}
21
}
27
22
28
+static target_ulong get_next_pc(CPUS390XState *env, DisasContext *s,
23
+/**
29
+ uint64_t pc)
24
+ * tcg_out_movext3 -- move and extend three pair
30
+{
25
+ * @s: tcg context
31
+ uint64_t insn = ld_code2(env, s, pc);
26
+ * @i1: first move description
32
+
27
+ * @i2: second move description
33
+ return pc + get_ilen((insn >> 8) & 0xff);
28
+ * @i3: third move description
34
+}
29
+ * @scratch: temporary register, or -1 for none
35
+
30
+ *
36
static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
31
+ * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
37
{
32
+ * between the sources and destinations.
38
CPUS390XState *env = cs->env_ptr;
39
@@ -XXX,XX +XXX,XX @@ static void s390x_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
40
41
dc->base.is_jmp = translate_one(env, dc);
42
if (dc->base.is_jmp == DISAS_NEXT) {
43
- uint64_t page_start;
44
-
45
- page_start = dc->base.pc_first & TARGET_PAGE_MASK;
46
- if (dc->base.pc_next - page_start >= TARGET_PAGE_SIZE || dc->ex_value) {
47
+ if (!is_same_page(dcbase, dc->base.pc_next) ||
48
+ !is_same_page(dcbase, get_next_pc(env, dc, dc->base.pc_next)) ||
49
+ dc->ex_value) {
50
dc->base.is_jmp = DISAS_TOO_MANY;
51
}
52
}
53
diff --git a/tests/tcg/s390x/noexec.c b/tests/tcg/s390x/noexec.c
54
new file mode 100644
55
index XXXXXXX..XXXXXXX
56
--- /dev/null
57
+++ b/tests/tcg/s390x/noexec.c
58
@@ -XXX,XX +XXX,XX @@
59
+#include "../multiarch/noexec.c.inc"
60
+
61
+static void *arch_mcontext_pc(const mcontext_t *ctx)
62
+{
63
+ return (void *)ctx->psw.addr;
64
+}
65
+
66
+static int arch_mcontext_arg(const mcontext_t *ctx)
67
+{
68
+ return ctx->gregs[2];
69
+}
70
+
71
+static void arch_flush(void *p, int len)
72
+{
73
+}
74
+
75
+extern char noexec_1[];
76
+extern char noexec_2[];
77
+extern char noexec_end[];
78
+
79
+asm("noexec_1:\n"
80
+ " lgfi %r2,1\n" /* %r2 is 0 on entry, set 1. */
81
+ "noexec_2:\n"
82
+ " lgfi %r2,2\n" /* %r2 is 0/1; set 2. */
83
+ " br %r14\n" /* return */
84
+ "noexec_end:");
85
+
86
+extern char exrl_1[];
87
+extern char exrl_2[];
88
+extern char exrl_end[];
89
+
90
+asm("exrl_1:\n"
91
+ " exrl %r0, exrl_2\n"
92
+ " br %r14\n"
93
+ "exrl_2:\n"
94
+ " lgfi %r2,2\n"
95
+ "exrl_end:");
96
+
97
+int main(void)
98
+{
99
+ struct noexec_test noexec_tests[] = {
100
+ {
101
+ .name = "fallthrough",
102
+ .test_code = noexec_1,
103
+ .test_len = noexec_end - noexec_1,
104
+ .page_ofs = noexec_1 - noexec_2,
105
+ .entry_ofs = noexec_1 - noexec_2,
106
+ .expected_si_ofs = 0,
107
+ .expected_pc_ofs = 0,
108
+ .expected_arg = 1,
109
+ },
110
+ {
111
+ .name = "jump",
112
+ .test_code = noexec_1,
113
+ .test_len = noexec_end - noexec_1,
114
+ .page_ofs = noexec_1 - noexec_2,
115
+ .entry_ofs = 0,
116
+ .expected_si_ofs = 0,
117
+ .expected_pc_ofs = 0,
118
+ .expected_arg = 0,
119
+ },
120
+ {
121
+ .name = "exrl",
122
+ .test_code = exrl_1,
123
+ .test_len = exrl_end - exrl_1,
124
+ .page_ofs = exrl_1 - exrl_2,
125
+ .entry_ofs = exrl_1 - exrl_2,
126
+ .expected_si_ofs = 0,
127
+ .expected_pc_ofs = exrl_1 - exrl_2,
128
+ .expected_arg = 0,
129
+ },
130
+ {
131
+ .name = "fallthrough [cross]",
132
+ .test_code = noexec_1,
133
+ .test_len = noexec_end - noexec_1,
134
+ .page_ofs = noexec_1 - noexec_2 - 2,
135
+ .entry_ofs = noexec_1 - noexec_2 - 2,
136
+ .expected_si_ofs = 0,
137
+ .expected_pc_ofs = -2,
138
+ .expected_arg = 1,
139
+ },
140
+ {
141
+ .name = "jump [cross]",
142
+ .test_code = noexec_1,
143
+ .test_len = noexec_end - noexec_1,
144
+ .page_ofs = noexec_1 - noexec_2 - 2,
145
+ .entry_ofs = -2,
146
+ .expected_si_ofs = 0,
147
+ .expected_pc_ofs = -2,
148
+ .expected_arg = 0,
149
+ },
150
+ {
151
+ .name = "exrl [cross]",
152
+ .test_code = exrl_1,
153
+ .test_len = exrl_end - exrl_1,
154
+ .page_ofs = exrl_1 - exrl_2 - 2,
155
+ .entry_ofs = exrl_1 - exrl_2 - 2,
156
+ .expected_si_ofs = 0,
157
+ .expected_pc_ofs = exrl_1 - exrl_2 - 2,
158
+ .expected_arg = 0,
159
+ },
160
+ };
161
+
162
+ return test_noexec(noexec_tests,
163
+ sizeof(noexec_tests) / sizeof(noexec_tests[0]));
164
+}
165
diff --git a/tests/tcg/multiarch/noexec.c.inc b/tests/tcg/multiarch/noexec.c.inc
166
new file mode 100644
167
index XXXXXXX..XXXXXXX
168
--- /dev/null
169
+++ b/tests/tcg/multiarch/noexec.c.inc
170
@@ -XXX,XX +XXX,XX @@
171
+/*
172
+ * Common code for arch-specific MMU_INST_FETCH fault testing.
173
+ */
33
+ */
174
+
34
+
175
+#define _GNU_SOURCE
35
+static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
36
+ const TCGMovExtend *i2, const TCGMovExtend *i3,
37
+ int scratch)
38
+{
39
+ TCGReg src1 = i1->src;
40
+ TCGReg src2 = i2->src;
41
+ TCGReg src3 = i3->src;
176
+
42
+
177
+#include <assert.h>
43
+ if (i1->dst != src2 && i1->dst != src3) {
178
+#include <signal.h>
44
+ tcg_out_movext1(s, i1);
179
+#include <stdio.h>
45
+ tcg_out_movext2(s, i2, i3, scratch);
180
+#include <stdlib.h>
46
+ return;
181
+#include <string.h>
182
+#include <errno.h>
183
+#include <unistd.h>
184
+#include <sys/mman.h>
185
+#include <sys/ucontext.h>
186
+
187
+/* Forward declarations. */
188
+
189
+static void *arch_mcontext_pc(const mcontext_t *ctx);
190
+static int arch_mcontext_arg(const mcontext_t *ctx);
191
+static void arch_flush(void *p, int len);
192
+
193
+/* Testing infrastructure. */
194
+
195
+struct noexec_test {
196
+ const char *name;
197
+ const char *test_code;
198
+ int test_len;
199
+ int page_ofs;
200
+ int entry_ofs;
201
+ int expected_si_ofs;
202
+ int expected_pc_ofs;
203
+ int expected_arg;
204
+};
205
+
206
+static void *page_base;
207
+static int page_size;
208
+static const struct noexec_test *current_noexec_test;
209
+
210
+static void handle_err(const char *syscall)
211
+{
212
+ printf("[ FAILED ] %s: %s\n", syscall, strerror(errno));
213
+ exit(EXIT_FAILURE);
214
+}
215
+
216
+static void handle_segv(int sig, siginfo_t *info, void *ucontext)
217
+{
218
+ const struct noexec_test *test = current_noexec_test;
219
+ const mcontext_t *mc = &((ucontext_t *)ucontext)->uc_mcontext;
220
+ void *expected_si;
221
+ void *expected_pc;
222
+ void *pc;
223
+ int arg;
224
+
225
+ if (test == NULL) {
226
+ printf("[ FAILED ] unexpected SEGV\n");
227
+ exit(EXIT_FAILURE);
228
+ }
47
+ }
229
+ current_noexec_test = NULL;
48
+ if (i2->dst != src1 && i2->dst != src3) {
230
+
49
+ tcg_out_movext1(s, i2);
231
+ expected_si = page_base + test->expected_si_ofs;
50
+ tcg_out_movext2(s, i1, i3, scratch);
232
+ if (info->si_addr != expected_si) {
51
+ return;
233
+ printf("[ FAILED ] wrong si_addr (%p != %p)\n",
52
+ }
234
+ info->si_addr, expected_si);
53
+ if (i3->dst != src1 && i3->dst != src2) {
235
+ exit(EXIT_FAILURE);
54
+ tcg_out_movext1(s, i3);
55
+ tcg_out_movext2(s, i1, i2, scratch);
56
+ return;
236
+ }
57
+ }
237
+
58
+
238
+ pc = arch_mcontext_pc(mc);
59
+ /*
239
+ expected_pc = page_base + test->expected_pc_ofs;
60
+ * There is a cycle. Since there are only 3 nodes, the cycle is
240
+ if (pc != expected_pc) {
61
+ * either "clockwise" or "anti-clockwise", and can be solved with
241
+ printf("[ FAILED ] wrong pc (%p != %p)\n", pc, expected_pc);
62
+ * a single scratch or two xchg.
242
+ exit(EXIT_FAILURE);
63
+ */
243
+ }
64
+ if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
244
+
65
+ /* "Clockwise" */
245
+ arg = arch_mcontext_arg(mc);
66
+ if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
246
+ if (arg != test->expected_arg) {
67
+ tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
247
+ printf("[ FAILED ] wrong arg (%d != %d)\n", arg, test->expected_arg);
68
+ /* The data is now in the correct registers, now extend. */
248
+ exit(EXIT_FAILURE);
69
+ tcg_out_movext1_new_src(s, i1, i1->dst);
249
+ }
70
+ tcg_out_movext1_new_src(s, i2, i2->dst);
250
+
71
+ tcg_out_movext1_new_src(s, i3, i3->dst);
251
+ if (mprotect(page_base, page_size,
72
+ } else {
252
+ PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
73
+ tcg_debug_assert(scratch >= 0);
253
+ handle_err("mprotect");
74
+ tcg_out_mov(s, i1->src_type, scratch, src1);
75
+ tcg_out_movext1(s, i3);
76
+ tcg_out_movext1(s, i2);
77
+ tcg_out_movext1_new_src(s, i1, scratch);
78
+ }
79
+ } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
80
+ /* "Anti-clockwise" */
81
+ if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
82
+ tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
83
+ /* The data is now in the correct registers, now extend. */
84
+ tcg_out_movext1_new_src(s, i1, i1->dst);
85
+ tcg_out_movext1_new_src(s, i2, i2->dst);
86
+ tcg_out_movext1_new_src(s, i3, i3->dst);
87
+ } else {
88
+ tcg_debug_assert(scratch >= 0);
89
+ tcg_out_mov(s, i1->src_type, scratch, src1);
90
+ tcg_out_movext1(s, i2);
91
+ tcg_out_movext1(s, i3);
92
+ tcg_out_movext1_new_src(s, i1, scratch);
93
+ }
94
+ } else {
95
+ g_assert_not_reached();
254
+ }
96
+ }
255
+}
97
+}
256
+
98
+
257
+static void test_noexec_1(const struct noexec_test *test)
99
#define C_PFX1(P, A) P##A
258
+{
100
#define C_PFX2(P, A, B) P##A##_##B
259
+ void *start = page_base + test->page_ofs;
101
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
260
+ void (*fn)(int arg) = page_base + test->entry_ofs;
102
@@ -XXX,XX +XXX,XX @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
103
104
static void tcg_out_helper_load_regs(TCGContext *s,
105
unsigned nmov, TCGMovExtend *mov,
106
- unsigned ntmp, const int *tmp)
107
+ const TCGLdstHelperParam *parm)
108
{
109
+ TCGReg dst3;
261
+
110
+
262
+ memcpy(start, test->test_code, test->test_len);
111
switch (nmov) {
263
+ arch_flush(start, test->test_len);
112
- default:
113
+ case 4:
114
/* The backend must have provided enough temps for the worst case. */
115
- tcg_debug_assert(ntmp + 1 >= nmov);
116
+ tcg_debug_assert(parm->ntmp >= 2);
117
118
- for (unsigned i = nmov - 1; i >= 2; --i) {
119
- TCGReg dst = mov[i].dst;
120
+ dst3 = mov[3].dst;
121
+ for (unsigned j = 0; j < 3; ++j) {
122
+ if (dst3 == mov[j].src) {
123
+ /*
124
+ * Conflict. Copy the source to a temporary, perform the
125
+ * remaining moves, then the extension from our scratch
126
+ * on the way out.
127
+ */
128
+ TCGReg scratch = parm->tmp[1];
129
130
- for (unsigned j = 0; j < i; ++j) {
131
- if (dst == mov[j].src) {
132
- /*
133
- * Conflict.
134
- * Copy the source to a temporary, recurse for the
135
- * remaining moves, perform the extension from our
136
- * scratch on the way out.
137
- */
138
- TCGReg scratch = tmp[--ntmp];
139
- tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
140
- mov[i].src = scratch;
141
-
142
- tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
143
- tcg_out_movext1(s, &mov[i]);
144
- return;
145
- }
146
+ tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
147
+ tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
148
+ tcg_out_movext1_new_src(s, &mov[3], scratch);
149
+ break;
150
}
151
-
152
- /* No conflicts: perform this move and continue. */
153
- tcg_out_movext1(s, &mov[i]);
154
}
155
- /* fall through for the final two moves */
156
157
+ /* No conflicts: perform this move and continue. */
158
+ tcg_out_movext1(s, &mov[3]);
159
+ /* fall through */
264
+
160
+
265
+ /* Trigger TB creation in order to test invalidation. */
161
+ case 3:
266
+ fn(0);
162
+ tcg_out_movext3(s, mov, mov + 1, mov + 2,
267
+
163
+ parm->ntmp ? parm->tmp[0] : -1);
268
+ if (mprotect(page_base, page_size, PROT_NONE) < 0) {
164
+ break;
269
+ handle_err("mprotect");
165
case 2:
270
+ }
166
- tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
271
+
167
- return;
272
+ /* Trigger SEGV and check that handle_segv() ran. */
168
+ tcg_out_movext2(s, mov, mov + 1,
273
+ current_noexec_test = test;
169
+ parm->ntmp ? parm->tmp[0] : -1);
274
+ fn(0);
170
+ break;
275
+ assert(current_noexec_test == NULL);
171
case 1:
276
+}
172
tcg_out_movext1(s, mov);
277
+
173
- return;
278
+static int test_noexec(struct noexec_test *tests, size_t n_tests)
174
- case 0:
279
+{
175
+ break;
280
+ struct sigaction act;
176
+ default:
281
+ size_t i;
177
g_assert_not_reached();
282
+
178
}
283
+ memset(&act, 0, sizeof(act));
179
}
284
+ act.sa_sigaction = handle_segv;
180
@@ -XXX,XX +XXX,XX @@ static void tcg_out_helper_load_slots(TCGContext *s,
285
+ act.sa_flags = SA_SIGINFO;
181
for (i = 0; i < nmov; ++i) {
286
+ if (sigaction(SIGSEGV, &act, NULL) < 0) {
182
mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
287
+ handle_err("sigaction");
183
}
288
+ }
184
- tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
289
+
185
+ tcg_out_helper_load_regs(s, nmov, mov, parm);
290
+ page_size = getpagesize();
186
}
291
+ page_base = mmap(NULL, 2 * page_size,
187
292
+ PROT_READ | PROT_WRITE | PROT_EXEC,
188
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
293
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
294
+ if (page_base == MAP_FAILED) {
295
+ handle_err("mmap");
296
+ }
297
+ page_base += page_size;
298
+
299
+ for (i = 0; i < n_tests; i++) {
300
+ struct noexec_test *test = &tests[i];
301
+
302
+ printf("[ RUN ] %s\n", test->name);
303
+ test_noexec_1(test);
304
+ printf("[ OK ]\n");
305
+ }
306
+
307
+ printf("[ PASSED ]\n");
308
+ return EXIT_SUCCESS;
309
+}
310
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
311
index XXXXXXX..XXXXXXX 100644
312
--- a/tests/tcg/s390x/Makefile.target
313
+++ b/tests/tcg/s390x/Makefile.target
314
@@ -XXX,XX +XXX,XX @@ TESTS+=shift
315
TESTS+=trap
316
TESTS+=signals-s390x
317
TESTS+=branch-relative-long
318
+TESTS+=noexec
319
320
Z14_TESTS=vfminmax
321
vfminmax: LDFLAGS+=-lm
322
--
189
--
323
2.34.1
190
2.34.1
diff view generated by jsdifflib
New patch
1
Now that tcg_out_helper_load_regs is not recursive, we can
2
merge it into its only caller, tcg_out_helper_load_slots.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 89 +++++++++++++++++++++++++------------------------------
8
1 file changed, 41 insertions(+), 48 deletions(-)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
15
return ofs;
16
}
17
18
-static void tcg_out_helper_load_regs(TCGContext *s,
19
- unsigned nmov, TCGMovExtend *mov,
20
- const TCGLdstHelperParam *parm)
21
+static void tcg_out_helper_load_slots(TCGContext *s,
22
+ unsigned nmov, TCGMovExtend *mov,
23
+ const TCGLdstHelperParam *parm)
24
{
25
+ unsigned i;
26
TCGReg dst3;
27
28
+ /*
29
+ * Start from the end, storing to the stack first.
30
+ * This frees those registers, so we need not consider overlap.
31
+ */
32
+ for (i = nmov; i-- > 0; ) {
33
+ unsigned slot = mov[i].dst;
34
+
35
+ if (arg_slot_reg_p(slot)) {
36
+ goto found_reg;
37
+ }
38
+
39
+ TCGReg src = mov[i].src;
40
+ TCGType dst_type = mov[i].dst_type;
41
+ MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
42
+
43
+ /* The argument is going onto the stack; extend into scratch. */
44
+ if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
45
+ tcg_debug_assert(parm->ntmp != 0);
46
+ mov[i].dst = src = parm->tmp[0];
47
+ tcg_out_movext1(s, &mov[i]);
48
+ }
49
+
50
+ tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
51
+ tcg_out_helper_stk_ofs(dst_type, slot));
52
+ }
53
+ return;
54
+
55
+ found_reg:
56
+ /*
57
+ * The remaining arguments are in registers.
58
+ * Convert slot numbers to argument registers.
59
+ */
60
+ nmov = i + 1;
61
+ for (i = 0; i < nmov; ++i) {
62
+ mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
63
+ }
64
+
65
switch (nmov) {
66
case 4:
67
/* The backend must have provided enough temps for the worst case. */
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_helper_load_regs(TCGContext *s,
69
}
70
}
71
72
-static void tcg_out_helper_load_slots(TCGContext *s,
73
- unsigned nmov, TCGMovExtend *mov,
74
- const TCGLdstHelperParam *parm)
75
-{
76
- unsigned i;
77
-
78
- /*
79
- * Start from the end, storing to the stack first.
80
- * This frees those registers, so we need not consider overlap.
81
- */
82
- for (i = nmov; i-- > 0; ) {
83
- unsigned slot = mov[i].dst;
84
-
85
- if (arg_slot_reg_p(slot)) {
86
- goto found_reg;
87
- }
88
-
89
- TCGReg src = mov[i].src;
90
- TCGType dst_type = mov[i].dst_type;
91
- MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
92
-
93
- /* The argument is going onto the stack; extend into scratch. */
94
- if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
95
- tcg_debug_assert(parm->ntmp != 0);
96
- mov[i].dst = src = parm->tmp[0];
97
- tcg_out_movext1(s, &mov[i]);
98
- }
99
-
100
- tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
101
- tcg_out_helper_stk_ofs(dst_type, slot));
102
- }
103
- return;
104
-
105
- found_reg:
106
- /*
107
- * The remaining arguments are in registers.
108
- * Convert slot numbers to argument registers.
109
- */
110
- nmov = i + 1;
111
- for (i = 0; i < nmov; ++i) {
112
- mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
113
- }
114
- tcg_out_helper_load_regs(s, nmov, mov, parm);
115
-}
116
-
117
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
118
TCGType type, tcg_target_long imm,
119
const TCGLdstHelperParam *parm)
120
--
121
2.34.1
diff view generated by jsdifflib
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
3
Right now translator stops right *after* the end of a page, which
4
breaks reporting of fault locations when the last instruction of a
5
multi-insn translation block crosses a page boundary.
6
7
An implementation, like the one arm and s390x have, would require an
8
i386 length disassembler, which is burdensome to maintain. Another
9
alternative would be to single-step at the end of a guest page, but
10
this may come with a performance impact.
11
12
Fix by snapshotting disassembly state and restoring it after we figure
13
out we crossed a page boundary. This includes rolling back cc_op
14
updates and emitted ops.
15
16
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
17
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
18
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1143
19
Message-Id: <20220817150506.592862-4-iii@linux.ibm.com>
20
[rth: Simplify end-of-insn cross-page checks.]
21
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
22
---
3
---
23
target/i386/tcg/translate.c | 64 ++++++++++++++++-----------
4
tcg/tcg.c | 196 +++++++++++++++++++++++++++++++++++++++++++++---------
24
tests/tcg/x86_64/noexec.c | 75 ++++++++++++++++++++++++++++++++
5
1 file changed, 163 insertions(+), 33 deletions(-)
25
tests/tcg/x86_64/Makefile.target | 3 +-
26
3 files changed, 116 insertions(+), 26 deletions(-)
27
create mode 100644 tests/tcg/x86_64/noexec.c
28
6
29
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
7
diff --git a/tcg/tcg.c b/tcg/tcg.c
30
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
31
--- a/target/i386/tcg/translate.c
9
--- a/tcg/tcg.c
32
+++ b/target/i386/tcg/translate.c
10
+++ b/tcg/tcg.c
33
@@ -XXX,XX +XXX,XX @@ typedef struct DisasContext {
11
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
34
TCGv_i64 tmp1_i64;
12
[MO_UQ] = helper_ldq_mmu,
35
13
#if TCG_TARGET_REG_BITS == 64
36
sigjmp_buf jmpbuf;
14
[MO_SL] = helper_ldsl_mmu,
37
+ TCGOp *prev_insn_end;
15
+ [MO_128] = helper_ld16_mmu,
38
} DisasContext;
16
#endif
39
17
};
40
/* The environment in which user-only runs is constrained. */
18
41
@@ -XXX,XX +XXX,XX @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes)
19
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
20
[MO_16] = helper_stw_mmu,
21
[MO_32] = helper_stl_mmu,
22
[MO_64] = helper_stq_mmu,
23
+#if TCG_TARGET_REG_BITS == 64
24
+ [MO_128] = helper_st16_mmu,
25
+#endif
26
};
27
28
TCGContext tcg_init_ctx;
29
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld64_mmu = {
30
| dh_typemask(ptr, 4) /* uintptr_t ra */
31
};
32
33
+static TCGHelperInfo info_helper_ld128_mmu = {
34
+ .flags = TCG_CALL_NO_WG,
35
+ .typemask = dh_typemask(i128, 0) /* return Int128 */
36
+ | dh_typemask(env, 1)
37
+ | dh_typemask(tl, 2) /* target_ulong addr */
38
+ | dh_typemask(i32, 3) /* unsigned oi */
39
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
40
+};
41
+
42
static TCGHelperInfo info_helper_st32_mmu = {
43
.flags = TCG_CALL_NO_WG,
44
.typemask = dh_typemask(void, 0)
45
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st64_mmu = {
46
| dh_typemask(ptr, 5) /* uintptr_t ra */
47
};
48
49
+static TCGHelperInfo info_helper_st128_mmu = {
50
+ .flags = TCG_CALL_NO_WG,
51
+ .typemask = dh_typemask(void, 0)
52
+ | dh_typemask(env, 1)
53
+ | dh_typemask(tl, 2) /* target_ulong addr */
54
+ | dh_typemask(i128, 3) /* Int128 data */
55
+ | dh_typemask(i32, 4) /* unsigned oi */
56
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
57
+};
58
+
59
#ifdef CONFIG_TCG_INTERPRETER
60
static ffi_type *typecode_to_ffi(int argmask)
42
{
61
{
43
uint64_t pc = s->pc;
62
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
44
63
45
+ /* This is a subsequent insn that crosses a page boundary. */
64
init_call_layout(&info_helper_ld32_mmu);
46
+ if (s->base.num_insns > 1 &&
65
init_call_layout(&info_helper_ld64_mmu);
47
+ !is_same_page(&s->base, s->pc + num_bytes - 1)) {
66
+ init_call_layout(&info_helper_ld128_mmu);
48
+ siglongjmp(s->jmpbuf, 2);
67
init_call_layout(&info_helper_st32_mmu);
68
init_call_layout(&info_helper_st64_mmu);
69
+ init_call_layout(&info_helper_st128_mmu);
70
71
#ifdef CONFIG_TCG_INTERPRETER
72
init_ffi_layouts();
73
@@ -XXX,XX +XXX,XX @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
74
TCGType dst_type, TCGType src_type,
75
TCGReg lo, TCGReg hi)
76
{
77
+ MemOp reg_mo;
78
+
79
if (dst_type <= TCG_TYPE_REG) {
80
MemOp src_ext;
81
82
@@ -XXX,XX +XXX,XX @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
83
return 1;
84
}
85
86
- assert(TCG_TARGET_REG_BITS == 32);
87
+ if (TCG_TARGET_REG_BITS == 32) {
88
+ assert(dst_type == TCG_TYPE_I64);
89
+ reg_mo = MO_32;
90
+ } else {
91
+ assert(dst_type == TCG_TYPE_I128);
92
+ reg_mo = MO_64;
49
+ }
93
+ }
50
+
94
51
s->pc += num_bytes;
95
mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
52
if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) {
96
mov[0].src = lo;
53
/* If the instruction's 16th byte is on a different page than the 1st, a
97
- mov[0].dst_type = TCG_TYPE_I32;
54
@@ -XXX,XX +XXX,XX @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
98
- mov[0].src_type = TCG_TYPE_I32;
55
int modrm, reg, rm, mod, op, opreg, val;
99
- mov[0].src_ext = MO_32;
56
target_ulong next_eip, tval;
100
+ mov[0].dst_type = TCG_TYPE_REG;
57
target_ulong pc_start = s->base.pc_next;
101
+ mov[0].src_type = TCG_TYPE_REG;
58
+ bool orig_cc_op_dirty = s->cc_op_dirty;
102
+ mov[0].src_ext = reg_mo;
59
+ CCOp orig_cc_op = s->cc_op;
103
60
104
mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
61
s->pc_start = s->pc = pc_start;
105
mov[1].src = hi;
62
s->override = -1;
106
- mov[1].dst_type = TCG_TYPE_I32;
63
@@ -XXX,XX +XXX,XX @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
107
- mov[1].src_type = TCG_TYPE_I32;
64
s->rip_offset = 0; /* for relative ip address */
108
- mov[1].src_ext = MO_32;
65
s->vex_l = 0;
109
+ mov[1].dst_type = TCG_TYPE_REG;
66
s->vex_v = 0;
110
+ mov[1].src_type = TCG_TYPE_REG;
67
- if (sigsetjmp(s->jmpbuf, 0) != 0) {
111
+ mov[1].src_ext = reg_mo;
68
+ switch (sigsetjmp(s->jmpbuf, 0)) {
112
69
+ case 0:
113
return 2;
70
+ break;
114
}
71
+ case 1:
115
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
72
gen_exception_gpf(s);
116
case MO_64:
73
return s->pc;
117
info = &info_helper_ld64_mmu;
74
+ case 2:
118
break;
75
+ /* Restore state that may affect the next instruction. */
119
+ case MO_128:
76
+ s->cc_op_dirty = orig_cc_op_dirty;
120
+ info = &info_helper_ld128_mmu;
77
+ s->cc_op = orig_cc_op;
121
+ break;
78
+ s->base.num_insns--;
122
default:
79
+ tcg_remove_ops_after(s->prev_insn_end);
123
g_assert_not_reached();
80
+ s->base.is_jmp = DISAS_TOO_MANY;
124
}
81
+ return pc_start;
125
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
126
127
tcg_out_helper_load_slots(s, nmov, mov, parm);
128
129
- /* No special attention for 32 and 64-bit return values. */
130
- tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
131
+ switch (info->out_kind) {
132
+ case TCG_CALL_RET_NORMAL:
133
+ case TCG_CALL_RET_BY_VEC:
134
+ break;
135
+ case TCG_CALL_RET_BY_REF:
136
+ /*
137
+ * The return reference is in the first argument slot.
138
+ * We need memory in which to return: re-use the top of stack.
139
+ */
140
+ {
141
+ int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
142
+
143
+ if (arg_slot_reg_p(0)) {
144
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
145
+ TCG_REG_CALL_STACK, ofs_slot0);
146
+ } else {
147
+ tcg_debug_assert(parm->ntmp != 0);
148
+ tcg_out_addi_ptr(s, parm->tmp[0],
149
+ TCG_REG_CALL_STACK, ofs_slot0);
150
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
151
+ TCG_REG_CALL_STACK, ofs_slot0);
152
+ }
153
+ }
154
+ break;
155
+ default:
156
+ g_assert_not_reached();
157
+ }
158
159
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
160
}
161
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
162
bool load_sign,
163
const TCGLdstHelperParam *parm)
164
{
165
+ MemOp mop = get_memop(ldst->oi);
166
TCGMovExtend mov[2];
167
+ int ofs_slot0;
168
169
- if (ldst->type <= TCG_TYPE_REG) {
170
- MemOp mop = get_memop(ldst->oi);
171
+ switch (ldst->type) {
172
+ case TCG_TYPE_I64:
173
+ if (TCG_TARGET_REG_BITS == 32) {
174
+ break;
175
+ }
176
+ /* fall through */
177
178
+ case TCG_TYPE_I32:
179
mov[0].dst = ldst->datalo_reg;
180
mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
181
mov[0].dst_type = ldst->type;
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
183
mov[0].src_ext = mop & MO_SSIZE;
184
}
185
tcg_out_movext1(s, mov);
186
- } else {
187
- assert(TCG_TARGET_REG_BITS == 32);
188
+ return;
189
190
- mov[0].dst = ldst->datalo_reg;
191
- mov[0].src =
192
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
193
- mov[0].dst_type = TCG_TYPE_I32;
194
- mov[0].src_type = TCG_TYPE_I32;
195
- mov[0].src_ext = MO_32;
196
+ case TCG_TYPE_I128:
197
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
198
+ ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
199
+ switch (TCG_TARGET_CALL_RET_I128) {
200
+ case TCG_CALL_RET_NORMAL:
201
+ break;
202
+ case TCG_CALL_RET_BY_VEC:
203
+ tcg_out_st(s, TCG_TYPE_V128,
204
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
205
+ TCG_REG_CALL_STACK, ofs_slot0);
206
+ /* fall through */
207
+ case TCG_CALL_RET_BY_REF:
208
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
209
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
210
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
211
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
212
+ return;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ break;
217
218
- mov[1].dst = ldst->datahi_reg;
219
- mov[1].src =
220
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
221
- mov[1].dst_type = TCG_TYPE_REG;
222
- mov[1].src_type = TCG_TYPE_REG;
223
- mov[1].src_ext = MO_32;
224
-
225
- tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
82
+ default:
226
+ default:
83
+ g_assert_not_reached();
227
+ g_assert_not_reached();
84
}
228
}
85
229
+
86
prefixes = 0;
230
+ mov[0].dst = ldst->datalo_reg;
87
@@ -XXX,XX +XXX,XX @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
231
+ mov[0].src =
88
{
232
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
89
DisasContext *dc = container_of(dcbase, DisasContext, base);
233
+ mov[0].dst_type = TCG_TYPE_I32;
90
234
+ mov[0].src_type = TCG_TYPE_I32;
91
+ dc->prev_insn_end = tcg_last_op();
235
+ mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
92
tcg_gen_insn_start(dc->base.pc_next, dc->cc_op);
236
+
237
+ mov[1].dst = ldst->datahi_reg;
238
+ mov[1].src =
239
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
240
+ mov[1].dst_type = TCG_TYPE_REG;
241
+ mov[1].src_type = TCG_TYPE_REG;
242
+ mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
243
+
244
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
93
}
245
}
94
246
95
@@ -XXX,XX +XXX,XX @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
247
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
96
#endif
248
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
97
249
info = &info_helper_st64_mmu;
98
pc_next = disas_insn(dc, cpu);
250
data_type = TCG_TYPE_I64;
99
-
251
break;
100
- if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
252
+ case MO_128:
101
- /* if single step mode, we generate only one instruction and
253
+ info = &info_helper_st128_mmu;
102
- generate an exception */
254
+ data_type = TCG_TYPE_I128;
103
- /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
255
+ break;
104
- the flag and abort the translation to give the irqs a
256
default:
105
- chance to happen */
257
g_assert_not_reached();
106
- dc->base.is_jmp = DISAS_TOO_MANY;
258
}
107
- } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT)
259
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
108
- && ((pc_next & TARGET_PAGE_MASK)
260
109
- != ((pc_next + TARGET_MAX_INSN_SIZE - 1)
261
/* Handle data argument. */
110
- & TARGET_PAGE_MASK)
262
loc = &info->in[next_arg];
111
- || (pc_next & ~TARGET_PAGE_MASK) == 0)) {
263
- n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
112
- /* Do not cross the boundary of the pages in icount mode,
264
- ldst->datalo_reg, ldst->datahi_reg);
113
- it can cause an exception. Do it only when boundary is
265
- next_arg += n;
114
- crossed by the first instruction in the block.
266
- nmov += n;
115
- If current instruction already crossed the bound - it's ok,
267
- tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
116
- because an exception hasn't stopped this code.
268
+ switch (loc->kind) {
117
- */
269
+ case TCG_CALL_ARG_NORMAL:
118
- dc->base.is_jmp = DISAS_TOO_MANY;
270
+ case TCG_CALL_ARG_EXTEND_U:
119
- } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) {
271
+ case TCG_CALL_ARG_EXTEND_S:
120
- dc->base.is_jmp = DISAS_TOO_MANY;
272
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
121
- }
273
+ ldst->datalo_reg, ldst->datahi_reg);
122
-
274
+ next_arg += n;
123
dc->base.pc_next = pc_next;
275
+ nmov += n;
124
+
276
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
125
+ if (dc->base.is_jmp == DISAS_NEXT) {
277
+ break;
126
+ if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) {
278
+
127
+ /*
279
+ case TCG_CALL_ARG_BY_REF:
128
+ * If single step mode, we generate only one instruction and
280
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
129
+ * generate an exception.
281
+ tcg_debug_assert(data_type == TCG_TYPE_I128);
130
+ * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear
282
+ tcg_out_st(s, TCG_TYPE_I64,
131
+ * the flag and abort the translation to give the irqs a
283
+ HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
132
+ * chance to happen.
284
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
133
+ */
285
+ tcg_out_st(s, TCG_TYPE_I64,
134
+ dc->base.is_jmp = DISAS_TOO_MANY;
286
+ HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
135
+ } else if (!is_same_page(&dc->base, pc_next)) {
287
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
136
+ dc->base.is_jmp = DISAS_TOO_MANY;
288
+
289
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
290
+
291
+ if (arg_slot_reg_p(loc->arg_slot)) {
292
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
293
+ TCG_REG_CALL_STACK,
294
+ arg_slot_stk_ofs(loc->ref_slot));
295
+ } else {
296
+ tcg_debug_assert(parm->ntmp != 0);
297
+ tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
298
+ arg_slot_stk_ofs(loc->ref_slot));
299
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
300
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
137
+ }
301
+ }
302
+ next_arg += 2;
303
+ break;
304
+
305
+ default:
306
+ g_assert_not_reached();
138
+ }
307
+ }
308
309
- tcg_out_helper_load_slots(s, nmov, mov, parm);
310
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
139
}
311
}
140
312
141
static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
142
diff --git a/tests/tcg/x86_64/noexec.c b/tests/tcg/x86_64/noexec.c
143
new file mode 100644
144
index XXXXXXX..XXXXXXX
145
--- /dev/null
146
+++ b/tests/tcg/x86_64/noexec.c
147
@@ -XXX,XX +XXX,XX @@
148
+#include "../multiarch/noexec.c.inc"
149
+
150
+static void *arch_mcontext_pc(const mcontext_t *ctx)
151
+{
152
+ return (void *)ctx->gregs[REG_RIP];
153
+}
154
+
155
+int arch_mcontext_arg(const mcontext_t *ctx)
156
+{
157
+ return ctx->gregs[REG_RDI];
158
+}
159
+
160
+static void arch_flush(void *p, int len)
161
+{
162
+}
163
+
164
+extern char noexec_1[];
165
+extern char noexec_2[];
166
+extern char noexec_end[];
167
+
168
+asm("noexec_1:\n"
169
+ " movq $1,%rdi\n" /* %rdi is 0 on entry, set 1. */
170
+ "noexec_2:\n"
171
+ " movq $2,%rdi\n" /* %rdi is 0/1; set 2. */
172
+ " ret\n"
173
+ "noexec_end:");
174
+
175
+int main(void)
176
+{
177
+ struct noexec_test noexec_tests[] = {
178
+ {
179
+ .name = "fallthrough",
180
+ .test_code = noexec_1,
181
+ .test_len = noexec_end - noexec_1,
182
+ .page_ofs = noexec_1 - noexec_2,
183
+ .entry_ofs = noexec_1 - noexec_2,
184
+ .expected_si_ofs = 0,
185
+ .expected_pc_ofs = 0,
186
+ .expected_arg = 1,
187
+ },
188
+ {
189
+ .name = "jump",
190
+ .test_code = noexec_1,
191
+ .test_len = noexec_end - noexec_1,
192
+ .page_ofs = noexec_1 - noexec_2,
193
+ .entry_ofs = 0,
194
+ .expected_si_ofs = 0,
195
+ .expected_pc_ofs = 0,
196
+ .expected_arg = 0,
197
+ },
198
+ {
199
+ .name = "fallthrough [cross]",
200
+ .test_code = noexec_1,
201
+ .test_len = noexec_end - noexec_1,
202
+ .page_ofs = noexec_1 - noexec_2 - 2,
203
+ .entry_ofs = noexec_1 - noexec_2 - 2,
204
+ .expected_si_ofs = 0,
205
+ .expected_pc_ofs = -2,
206
+ .expected_arg = 1,
207
+ },
208
+ {
209
+ .name = "jump [cross]",
210
+ .test_code = noexec_1,
211
+ .test_len = noexec_end - noexec_1,
212
+ .page_ofs = noexec_1 - noexec_2 - 2,
213
+ .entry_ofs = -2,
214
+ .expected_si_ofs = 0,
215
+ .expected_pc_ofs = -2,
216
+ .expected_arg = 0,
217
+ },
218
+ };
219
+
220
+ return test_noexec(noexec_tests,
221
+ sizeof(noexec_tests) / sizeof(noexec_tests[0]));
222
+}
223
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
224
index XXXXXXX..XXXXXXX 100644
225
--- a/tests/tcg/x86_64/Makefile.target
226
+++ b/tests/tcg/x86_64/Makefile.target
227
@@ -XXX,XX +XXX,XX @@ include $(SRC_PATH)/tests/tcg/i386/Makefile.target
228
229
ifeq ($(filter %-linux-user, $(TARGET)),$(TARGET))
230
X86_64_TESTS += vsyscall
231
+X86_64_TESTS += noexec
232
TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
233
else
234
TESTS=$(MULTIARCH_TESTS)
235
@@ -XXX,XX +XXX,XX @@ test-x86_64: LDFLAGS+=-lm -lc
236
test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
237
    $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
238
239
-vsyscall: $(SRC_PATH)/tests/tcg/x86_64/vsyscall.c
240
+%: $(SRC_PATH)/tests/tcg/x86_64/%.c
241
    $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
242
--
313
--
243
2.34.1
314
2.34.1
diff view generated by jsdifflib
New patch
1
Examine MemOp for atomicity and alignment, adjusting alignment
2
as required to implement atomicity on the host.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
8
1 file changed, 95 insertions(+)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
15
#endif
16
};
17
18
+typedef struct {
19
+ MemOp atom; /* lg2 bits of atomicity required */
20
+ MemOp align; /* lg2 bits of alignment to use */
21
+} TCGAtomAlign;
22
+
23
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
24
+ MemOp host_atom, bool allow_two_ops)
25
+ __attribute__((unused));
26
+
27
TCGContext tcg_init_ctx;
28
__thread TCGContext *tcg_ctx;
29
30
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
31
}
32
}
33
34
+/**
35
+ * atom_and_align_for_opc:
36
+ * @s: tcg context
37
+ * @opc: memory operation code
38
+ * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
39
+ * @allow_two_ops: true if we are prepared to issue two operations
40
+ *
41
+ * Return the alignment and atomicity to use for the inline fast path
42
+ * for the given memory operation. The alignment may be larger than
43
+ * that specified in @opc, and the correct alignment will be diagnosed
44
+ * by the slow path helper.
45
+ *
46
+ * If @allow_two_ops, the host is prepared to test for 2x alignment,
47
+ * and issue two loads or stores for subalignment.
48
+ */
49
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
50
+ MemOp host_atom, bool allow_two_ops)
51
+{
52
+ MemOp align = get_alignment_bits(opc);
53
+ MemOp size = opc & MO_SIZE;
54
+ MemOp half = size ? size - 1 : 0;
55
+ MemOp atmax;
56
+ MemOp atom;
57
+
58
+ /* When serialized, no further atomicity required. */
59
+ if (s->gen_tb->cflags & CF_PARALLEL) {
60
+ atom = opc & MO_ATOM_MASK;
61
+ } else {
62
+ atom = MO_ATOM_NONE;
63
+ }
64
+
65
+ switch (atom) {
66
+ case MO_ATOM_NONE:
67
+ /* The operation requires no specific atomicity. */
68
+ atmax = MO_8;
69
+ break;
70
+
71
+ case MO_ATOM_IFALIGN:
72
+ atmax = size;
73
+ break;
74
+
75
+ case MO_ATOM_IFALIGN_PAIR:
76
+ atmax = half;
77
+ break;
78
+
79
+ case MO_ATOM_WITHIN16:
80
+ atmax = size;
81
+ if (size == MO_128) {
82
+ /* Misalignment implies !within16, and therefore no atomicity. */
83
+ } else if (host_atom != MO_ATOM_WITHIN16) {
84
+ /* The host does not implement within16, so require alignment. */
85
+ align = MAX(align, size);
86
+ }
87
+ break;
88
+
89
+ case MO_ATOM_WITHIN16_PAIR:
90
+ atmax = size;
91
+ /*
92
+ * Misalignment implies !within16, and therefore half atomicity.
93
+ * Any host prepared for two operations can implement this with
94
+ * half alignment.
95
+ */
96
+ if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
97
+ align = MAX(align, half);
98
+ }
99
+ break;
100
+
101
+ case MO_ATOM_SUBALIGN:
102
+ atmax = size;
103
+ if (host_atom != MO_ATOM_SUBALIGN) {
104
+ /* If unaligned but not odd, there are subobjects up to half. */
105
+ if (allow_two_ops) {
106
+ align = MAX(align, half);
107
+ } else {
108
+ align = MAX(align, size);
109
+ }
110
+ }
111
+ break;
112
+
113
+ default:
114
+ g_assert_not_reached();
115
+ }
116
+
117
+ return (TCGAtomAlign){ .atom = atmax, .align = align };
118
+}
119
+
120
/*
121
* Similarly for qemu_ld/st slow path helpers.
122
* We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
123
--
124
2.34.1
diff view generated by jsdifflib
New patch
1
No change to the ultimate load/store routines yet, so some atomicity
2
conditions not yet honored, but plumbs the change to alignment through
3
the relevant functions.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 27 +++++++++++++++------------
9
1 file changed, 15 insertions(+), 12 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
int index;
17
int ofs;
18
int seg;
19
+ TCGAtomAlign aa;
20
} HostAddress;
21
22
bool tcg_target_has_memory_bswap(MemOp memop)
23
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
{
25
TCGLabelQemuLdst *ldst = NULL;
26
MemOp opc = get_memop(oi);
27
- unsigned a_bits = get_alignment_bits(opc);
28
- unsigned a_mask = (1 << a_bits) - 1;
29
+ unsigned a_mask;
30
+
31
+#ifdef CONFIG_SOFTMMU
32
+ h->index = TCG_REG_L0;
33
+ h->ofs = 0;
34
+ h->seg = 0;
35
+#else
36
+ *h = x86_guest_base;
37
+#endif
38
+ h->base = addrlo;
39
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
40
+ a_mask = (1 << h->aa.align) - 1;
41
42
#ifdef CONFIG_SOFTMMU
43
int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
* copy the address and mask. For lesser alignments, check that we don't
46
* cross pages for the complete access.
47
*/
48
- if (a_bits >= s_bits) {
49
+ if (a_mask >= s_mask) {
50
tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
51
} else {
52
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
/* TLB Hit. */
55
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
56
offsetof(CPUTLBEntry, addend));
57
-
58
- *h = (HostAddress) {
59
- .base = addrlo,
60
- .index = TCG_REG_L0,
61
- };
62
#else
63
- if (a_bits) {
64
+ if (a_mask) {
65
ldst = new_ldst_label(s);
66
67
ldst->is_ld = is_ld;
68
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
69
ldst->label_ptr[0] = s->code_ptr;
70
s->code_ptr += 4;
71
}
72
-
73
- *h = x86_guest_base;
74
- h->base = addrlo;
75
#endif
76
77
return ldst;
78
--
79
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/aarch64/tcg-target.c.inc | 36 ++++++++++++++++++------------------
5
1 file changed, 18 insertions(+), 18 deletions(-)
1
6
7
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/aarch64/tcg-target.c.inc
10
+++ b/tcg/aarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ typedef struct {
12
TCGReg base;
13
TCGReg index;
14
TCGType index_ext;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned a_mask = (1u << a_bits) - 1;
25
+ unsigned a_mask;
26
+
27
+ h->aa = atom_and_align_for_opc(s, opc,
28
+ have_lse2 ? MO_ATOM_WITHIN16
29
+ : MO_ATOM_IFALIGN,
30
+ false);
31
+ a_mask = (1 << h->aa.align) - 1;
32
33
#ifdef CONFIG_SOFTMMU
34
unsigned s_bits = opc & MO_SIZE;
35
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
36
* bits within the address. For unaligned access, we check that we don't
37
* cross pages using the address of the last byte of the access.
38
*/
39
- if (a_bits >= s_bits) {
40
+ if (a_mask >= s_mask) {
41
x3 = addr_reg;
42
} else {
43
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
ldst->label_ptr[0] = s->code_ptr;
46
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
47
48
- *h = (HostAddress){
49
- .base = TCG_REG_X1,
50
- .index = addr_reg,
51
- .index_ext = addr_type
52
- };
53
+ h->base = TCG_REG_X1,
54
+ h->index = addr_reg;
55
+ h->index_ext = addr_type;
56
#else
57
if (a_mask) {
58
ldst = new_ldst_label(s);
59
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
60
}
61
62
if (USE_GUEST_BASE) {
63
- *h = (HostAddress){
64
- .base = TCG_REG_GUEST_BASE,
65
- .index = addr_reg,
66
- .index_ext = addr_type
67
- };
68
+ h->base = TCG_REG_GUEST_BASE;
69
+ h->index = addr_reg;
70
+ h->index_ext = addr_type;
71
} else {
72
- *h = (HostAddress){
73
- .base = addr_reg,
74
- .index = TCG_REG_XZR,
75
- .index_ext = TCG_TYPE_I64
76
- };
77
+ h->base = addr_reg;
78
+ h->index = TCG_REG_XZR;
79
+ h->index_ext = TCG_TYPE_I64;
80
}
81
#endif
82
83
--
84
2.34.1
diff view generated by jsdifflib
New patch
1
No change to the ultimate load/store routines yet, so some atomicity
2
conditions not yet honored, but plumbs the change to alignment through
3
the relevant functions.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/arm/tcg-target.c.inc | 39 ++++++++++++++++++++++-----------------
9
1 file changed, 22 insertions(+), 17 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
TCGReg base;
17
int index;
18
bool index_scratch;
19
+ TCGAtomAlign aa;
20
} HostAddress;
21
22
bool tcg_target_has_memory_bswap(MemOp memop)
23
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
{
25
TCGLabelQemuLdst *ldst = NULL;
26
MemOp opc = get_memop(oi);
27
- MemOp a_bits = get_alignment_bits(opc);
28
- unsigned a_mask = (1 << a_bits) - 1;
29
+ unsigned a_mask;
30
+
31
+#ifdef CONFIG_SOFTMMU
32
+ *h = (HostAddress){
33
+ .cond = COND_AL,
34
+ .base = addrlo,
35
+ .index = TCG_REG_R1,
36
+ .index_scratch = true,
37
+ };
38
+#else
39
+ *h = (HostAddress){
40
+ .cond = COND_AL,
41
+ .base = addrlo,
42
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
43
+ .index_scratch = false,
44
+ };
45
+#endif
46
+
47
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
48
+ a_mask = (1 << h->aa.align) - 1;
49
50
#ifdef CONFIG_SOFTMMU
51
int mem_index = get_mmuidx(oi);
52
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
53
if (TARGET_LONG_BITS == 64) {
54
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
55
}
56
-
57
- *h = (HostAddress){
58
- .cond = COND_AL,
59
- .base = addrlo,
60
- .index = TCG_REG_R1,
61
- .index_scratch = true,
62
- };
63
#else
64
if (a_mask) {
65
ldst = new_ldst_label(s);
66
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
67
ldst->addrlo_reg = addrlo;
68
ldst->addrhi_reg = addrhi;
69
70
- /* We are expecting a_bits to max out at 7 */
71
+ /* We are expecting alignment to max out at 7 */
72
tcg_debug_assert(a_mask <= 0xff);
73
/* tst addr, #mask */
74
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
75
}
76
-
77
- *h = (HostAddress){
78
- .cond = COND_AL,
79
- .base = addrlo,
80
- .index = guest_base ? TCG_REG_GUEST_BASE : -1,
81
- .index_scratch = false,
82
- };
83
#endif
84
85
return ldst;
86
--
87
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/loongarch64/tcg-target.c.inc | 6 +++++-
5
1 file changed, 5 insertions(+), 1 deletion(-)
1
6
7
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/loongarch64/tcg-target.c.inc
10
+++ b/tcg/loongarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
+ MemOp a_bits;
25
+
26
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
27
+ a_bits = h->aa.align;
28
29
#ifdef CONFIG_SOFTMMU
30
unsigned s_bits = opc & MO_SIZE;
31
--
32
2.34.1
diff view generated by jsdifflib
1
The only user can easily use translator_lduw and
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
adjust the type to signed during the return.
3
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
3
---
9
include/exec/translator.h | 1 -
4
tcg/mips/tcg-target.c.inc | 15 +++++++++------
10
target/i386/tcg/translate.c | 2 +-
5
1 file changed, 9 insertions(+), 6 deletions(-)
11
2 files changed, 1 insertion(+), 2 deletions(-)
12
6
13
diff --git a/include/exec/translator.h b/include/exec/translator.h
7
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
14
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/translator.h
9
--- a/tcg/mips/tcg-target.c.inc
16
+++ b/include/exec/translator.h
10
+++ b/tcg/mips/tcg-target.c.inc
17
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
18
12
19
#define FOR_EACH_TRANSLATOR_LD(F) \
13
typedef struct {
20
F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */) \
14
TCGReg base;
21
- F(translator_ldsw, int16_t, cpu_ldsw_code, bswap16) \
15
- MemOp align;
22
F(translator_lduw, uint16_t, cpu_lduw_code, bswap16) \
16
+ TCGAtomAlign aa;
23
F(translator_ldl, uint32_t, cpu_ldl_code, bswap32) \
17
} HostAddress;
24
F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
18
25
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
19
bool tcg_target_has_memory_bswap(MemOp memop)
26
index XXXXXXX..XXXXXXX 100644
20
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
27
--- a/target/i386/tcg/translate.c
28
+++ b/target/i386/tcg/translate.c
29
@@ -XXX,XX +XXX,XX @@ static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s)
30
31
static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s)
32
{
21
{
33
- return translator_ldsw(env, &s->base, advance_pc(env, s, 2));
22
TCGLabelQemuLdst *ldst = NULL;
34
+ return translator_lduw(env, &s->base, advance_pc(env, s, 2));
23
MemOp opc = get_memop(oi);
24
- unsigned a_bits = get_alignment_bits(opc);
25
+ MemOp a_bits;
26
unsigned s_bits = opc & MO_SIZE;
27
- unsigned a_mask = (1 << a_bits) - 1;
28
+ unsigned a_mask;
29
TCGReg base;
30
31
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
32
+ a_bits = h->aa.align;
33
+ a_mask = (1 << a_bits) - 1;
34
+
35
#ifdef CONFIG_SOFTMMU
36
unsigned s_mask = (1 << s_bits) - 1;
37
int mem_index = get_mmuidx(oi);
38
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
39
#endif
40
41
h->base = base;
42
- h->align = a_bits;
43
return ldst;
35
}
44
}
36
45
37
static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s)
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
47
48
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
49
50
- if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
51
+ if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
52
tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
53
} else {
54
tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
55
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
56
57
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
58
59
- if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
60
+ if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
61
tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
62
} else {
63
tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
38
--
64
--
39
2.34.1
65
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/ppc/tcg-target.c.inc | 19 ++++++++++++++++++-
5
1 file changed, 18 insertions(+), 1 deletion(-)
1
6
7
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/ppc/tcg-target.c.inc
10
+++ b/tcg/ppc/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
+ MemOp a_bits;
25
+
26
+ /*
27
+ * Book II, Section 1.4, Single-Copy Atomicity, specifies:
28
+ *
29
+ * Before 3.0, "An access that is not atomic is performed as a set of
30
+ * smaller disjoint atomic accesses. In general, the number and alignment
31
+ * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN.
32
+ *
33
+ * As of 3.0, "the non-atomic access is performed as described in
34
+ * the corresponding list", which matches MO_ATOM_SUBALIGN.
35
+ */
36
+ h->aa = atom_and_align_for_opc(s, opc,
37
+ have_isa_3_00 ? MO_ATOM_SUBALIGN
38
+ : MO_ATOM_IFALIGN,
39
+ false);
40
+ a_bits = h->aa.align;
41
42
#ifdef CONFIG_SOFTMMU
43
int mem_index = get_mmuidx(oi);
44
--
45
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/riscv/tcg-target.c.inc | 13 ++++++++-----
5
1 file changed, 8 insertions(+), 5 deletions(-)
1
6
7
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/riscv/tcg-target.c.inc
10
+++ b/tcg/riscv/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
12
{
13
TCGLabelQemuLdst *ldst = NULL;
14
MemOp opc = get_memop(oi);
15
- unsigned a_bits = get_alignment_bits(opc);
16
- unsigned a_mask = (1u << a_bits) - 1;
17
+ TCGAtomAlign aa;
18
+ unsigned a_mask;
19
+
20
+ aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
21
+ a_mask = (1u << aa.align) - 1;
22
23
#ifdef CONFIG_SOFTMMU
24
unsigned s_bits = opc & MO_SIZE;
25
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
26
* cross pages using the address of the last byte of the access.
27
*/
28
addr_adj = addr_reg;
29
- if (a_bits < s_bits) {
30
+ if (a_mask < s_mask) {
31
addr_adj = TCG_REG_TMP0;
32
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
33
addr_adj, addr_reg, s_mask - a_mask);
34
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
35
ldst->oi = oi;
36
ldst->addrlo_reg = addr_reg;
37
38
- /* We are expecting a_bits max 7, so we can always use andi. */
39
- tcg_debug_assert(a_bits < 12);
40
+ /* We are expecting alignment max 7, so we can always use andi. */
41
+ tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
42
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
43
44
ldst->label_ptr[0] = s->code_ptr;
45
--
46
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/s390x/tcg-target.c.inc | 11 +++++++----
5
1 file changed, 7 insertions(+), 4 deletions(-)
1
6
7
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/s390x/tcg-target.c.inc
10
+++ b/tcg/s390x/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ typedef struct {
12
TCGReg base;
13
TCGReg index;
14
int disp;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned a_mask = (1u << a_bits) - 1;
25
+ unsigned a_mask;
26
+
27
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
28
+ a_mask = (1 << h->aa.align) - 1;
29
30
#ifdef CONFIG_SOFTMMU
31
unsigned s_bits = opc & MO_SIZE;
32
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
33
* bits within the address. For unaligned access, we check that we don't
34
* cross pages using the address of the last byte of the access.
35
*/
36
- a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
37
+ a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
38
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
39
if (a_off == 0) {
40
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
41
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
42
ldst->addrlo_reg = addr_reg;
43
44
/* We are expecting a_bits to max out at 7, much lower than TMLL. */
45
- tcg_debug_assert(a_bits < 16);
46
+ tcg_debug_assert(a_mask <= 0xffff);
47
tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
48
49
tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
50
--
51
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 21 ++++++++++++---------
5
1 file changed, 12 insertions(+), 9 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned s_bits = opc & MO_SIZE;
25
+ MemOp s_bits = opc & MO_SIZE;
26
unsigned a_mask;
27
28
/* We don't support unaligned accesses. */
29
- a_bits = MAX(a_bits, s_bits);
30
- a_mask = (1u << a_bits) - 1;
31
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
32
+ h->aa.align = MAX(h->aa.align, s_bits);
33
+ a_mask = (1u << h->aa.align) - 1;
34
35
#ifdef CONFIG_SOFTMMU
36
int mem_index = get_mmuidx(oi);
37
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
38
cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
39
tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
40
#else
41
- if (a_bits != s_bits) {
42
- /*
43
- * Test for at least natural alignment, and defer
44
- * everything else to the helper functions.
45
- */
46
+ /*
47
+ * If the size equals the required alignment, we can skip the test
48
+ * and allow host SIGBUS to deliver SIGBUS to the guest.
49
+ * Otherwise, test for at least natural alignment and defer
50
+ * everything else to the helper functions.
51
+ */
52
+ if (s_bits != get_alignment_bits(opc)) {
53
tcg_debug_assert(check_fit_tl(a_mask, 13));
54
tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
55
56
--
57
2.34.1
diff view generated by jsdifflib
New patch
1
Use the fpu to perform 64-bit loads and stores.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/i386/tcg-target.c.inc | 44 +++++++++++++++++++++++++++++++++------
7
1 file changed, 38 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
14
#define OPC_GRP5 (0xff)
15
#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
16
17
+#define OPC_ESCDF (0xdf)
18
+#define ESCDF_FILD_m64 5
19
+#define ESCDF_FISTP_m64 7
20
+
21
/* Group 1 opcode extensions for 0x80-0x83.
22
These are also used as modifiers for OPC_ARITH. */
23
#define ARITH_ADD 0
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
25
datalo = datahi;
26
datahi = t;
27
}
28
- if (h.base == datalo || h.index == datalo) {
29
+ if (h.aa.atom == MO_64) {
30
+ /*
31
+ * Atomicity requires that we use use a single 8-byte load.
32
+ * For simplicity and code size, always use the FPU for this.
33
+ * Similar insns using SSE/AVX are merely larger.
34
+ * Load from memory in one go, then store back to the stack,
35
+ * from whence we can load into the correct integer regs.
36
+ */
37
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FILD_m64,
38
+ h.base, h.index, 0, h.ofs);
39
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FISTP_m64, TCG_REG_ESP, 0);
40
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
41
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
42
+ } else if (h.base == datalo || h.index == datalo) {
43
tcg_out_modrm_sib_offset(s, OPC_LEA, datahi,
44
h.base, h.index, 0, h.ofs);
45
tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0);
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
47
if (TCG_TARGET_REG_BITS == 64) {
48
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
49
h.base, h.index, 0, h.ofs);
50
+ break;
51
+ }
52
+ if (use_movbe) {
53
+ TCGReg t = datalo;
54
+ datalo = datahi;
55
+ datahi = t;
56
+ }
57
+ if (h.aa.atom == MO_64) {
58
+ /*
59
+ * Atomicity requires that we use use one 8-byte store.
60
+ * For simplicity, and code size, always use the FPU for this.
61
+ * Similar insns using SSE/AVX are merely larger.
62
+ * Assemble the 8-byte quantity in required endianness
63
+ * on the stack, load to coproc unit, and store.
64
+ */
65
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
66
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
67
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FILD_m64, TCG_REG_ESP, 0);
68
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FISTP_m64,
69
+ h.base, h.index, 0, h.ofs);
70
} else {
71
- if (use_movbe) {
72
- TCGReg t = datalo;
73
- datalo = datahi;
74
- datahi = t;
75
- }
76
tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
77
h.base, h.index, 0, h.ofs);
78
tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
79
--
80
2.34.1
diff view generated by jsdifflib
1
These will be useful in properly ending the TB.
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
3
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
5
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
target/riscv/translate.c | 10 +++++++++-
4
tcg/i386/tcg-target.h | 3 +-
9
1 file changed, 9 insertions(+), 1 deletion(-)
5
tcg/i386/tcg-target.c.inc | 181 +++++++++++++++++++++++++++++++++++++-
6
2 files changed, 180 insertions(+), 4 deletions(-)
10
7
11
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
8
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
13
--- a/target/riscv/translate.c
10
--- a/tcg/i386/tcg-target.h
14
+++ b/target/riscv/translate.c
11
+++ b/tcg/i386/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
12
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
16
/* Include decoders for factored-out extensions */
13
#define TCG_TARGET_HAS_qemu_st8_i32 1
17
#include "decode-XVentanaCondOps.c.inc"
14
#endif
18
15
19
+/* The specification allows for longer insns, but not supported by qemu. */
16
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
20
+#define MAX_INSN_LEN 4
17
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
21
+
18
+ (TCG_TARGET_REG_BITS == 64 && have_atomic16)
22
+static inline int insn_len(uint16_t first_word)
19
20
/* We do not support older SSE systems, only beginning with AVX1. */
21
#define TCG_TARGET_HAS_v64 have_avx1
22
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
23
index XXXXXXX..XXXXXXX 100644
24
--- a/tcg/i386/tcg-target.c.inc
25
+++ b/tcg/i386/tcg-target.c.inc
26
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
27
#endif
28
};
29
30
+#define TCG_TMP_VEC TCG_REG_XMM5
31
+
32
static const int tcg_target_call_iarg_regs[] = {
33
#if TCG_TARGET_REG_BITS == 64
34
#if defined(_WIN64)
35
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
36
#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
37
#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
38
#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
39
+#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16)
40
+#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16)
41
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
42
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
43
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
44
@@ -XXX,XX +XXX,XX @@ typedef struct {
45
46
bool tcg_target_has_memory_bswap(MemOp memop)
47
{
48
- return have_movbe;
49
+ TCGAtomAlign aa;
50
+
51
+ if (!have_movbe) {
52
+ return false;
53
+ }
54
+ if ((memop & MO_SIZE) <= MO_64) {
55
+ return true;
56
+ }
57
+
58
+ /*
59
+ * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
60
+ * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
61
+ */
62
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
63
+ return aa.atom <= MO_64;
64
}
65
66
/*
67
@@ -XXX,XX +XXX,XX @@ static const TCGLdstHelperParam ldst_helper_param = {
68
static const TCGLdstHelperParam ldst_helper_param = { };
69
#endif
70
71
+static void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
72
+ TCGReg l, TCGReg h, TCGReg v)
23
+{
73
+{
24
+ return (first_word & 3) == 3 ? 4 : 2;
74
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
75
+
76
+ /* vpmov{d,q} %v, %l */
77
+ tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
78
+ /* vpextr{d,q} $1, %v, %h */
79
+ tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
80
+ tcg_out8(s, 1);
25
+}
81
+}
26
+
82
+
27
static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
83
+static void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
84
+ TCGReg v, TCGReg l, TCGReg h)
85
+{
86
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
87
+
88
+ /* vmov{d,q} %l, %v */
89
+ tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
90
+ /* vpinsr{d,q} $1, %h, %v, %v */
91
+ tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
92
+ tcg_out8(s, 1);
93
+}
94
+
95
/*
96
* Generate code for the slow path for a load at the end of block
97
*/
98
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
{
99
{
29
/*
100
TCGLabelQemuLdst *ldst = NULL;
30
@@ -XXX,XX +XXX,XX @@ static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
101
MemOp opc = get_memop(oi);
31
};
102
+ MemOp s_bits = opc & MO_SIZE;
32
103
unsigned a_mask;
33
/* Check for compressed insn */
104
34
- if (extract16(opcode, 0, 2) != 3) {
105
#ifdef CONFIG_SOFTMMU
35
+ if (insn_len(opcode) == 2) {
106
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
36
if (!has_ext(ctx, RVC)) {
107
*h = x86_guest_base;
37
gen_exception_illegal(ctx);
108
#endif
38
} else {
109
h->base = addrlo;
110
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
111
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
112
a_mask = (1 << h->aa.align) - 1;
113
114
#ifdef CONFIG_SOFTMMU
115
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
116
TCGType tlbtype = TCG_TYPE_I32;
117
int trexw = 0, hrexw = 0, tlbrexw = 0;
118
unsigned mem_index = get_mmuidx(oi);
119
- unsigned s_bits = opc & MO_SIZE;
120
unsigned s_mask = (1 << s_bits) - 1;
121
target_ulong tlb_mask;
122
123
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
124
h.base, h.index, 0, h.ofs + 4);
125
}
126
break;
127
+
128
+ case MO_128:
129
+ {
130
+ TCGLabel *l1 = NULL, *l2 = NULL;
131
+ bool use_pair = h.aa.atom < MO_128;
132
+
133
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
134
+
135
+ if (!use_pair) {
136
+ tcg_debug_assert(!use_movbe);
137
+ /*
138
+ * Atomicity requires that we use use VMOVDQA.
139
+ * If we've already checked for 16-byte alignment, that's all
140
+ * we need. If we arrive here with lesser alignment, then we
141
+ * have determined that less than 16-byte alignment can be
142
+ * satisfied with two 8-byte loads.
143
+ */
144
+ if (h.aa.align < MO_128) {
145
+ use_pair = true;
146
+ l1 = gen_new_label();
147
+ l2 = gen_new_label();
148
+
149
+ tcg_out_testi(s, h.base, 15);
150
+ tcg_out_jxx(s, JCC_JNE, l2, true);
151
+ }
152
+
153
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
154
+ TCG_TMP_VEC, 0,
155
+ h.base, h.index, 0, h.ofs);
156
+ tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo,
157
+ datahi, TCG_TMP_VEC);
158
+
159
+ if (use_pair) {
160
+ tcg_out_jxx(s, JCC_JMP, l1, true);
161
+ tcg_out_label(s, l2);
162
+ }
163
+ }
164
+ if (use_pair) {
165
+ if (use_movbe) {
166
+ TCGReg t = datalo;
167
+ datalo = datahi;
168
+ datahi = t;
169
+ }
170
+ if (h.base == datalo || h.index == datalo) {
171
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
172
+ h.base, h.index, 0, h.ofs);
173
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
174
+ datalo, datahi, 0);
175
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
176
+ datahi, datahi, 8);
177
+ } else {
178
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
179
+ h.base, h.index, 0, h.ofs);
180
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
181
+ h.base, h.index, 0, h.ofs + 8);
182
+ }
183
+ }
184
+ if (l1) {
185
+ tcg_out_label(s, l1);
186
+ }
187
+ }
188
+ break;
189
+
190
default:
191
g_assert_not_reached();
192
}
193
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
194
h.base, h.index, 0, h.ofs + 4);
195
}
196
break;
197
+
198
+ case MO_128:
199
+ {
200
+ TCGLabel *l1 = NULL, *l2 = NULL;
201
+ bool use_pair = h.aa.atom < MO_128;
202
+
203
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
204
+
205
+ if (!use_pair) {
206
+ tcg_debug_assert(!use_movbe);
207
+ /*
208
+ * Atomicity requires that we use use VMOVDQA.
209
+ * If we've already checked for 16-byte alignment, that's all
210
+ * we need. If we arrive here with lesser alignment, then we
211
+ * have determined that less that 16-byte alignment can be
212
+ * satisfied with two 8-byte loads.
213
+ */
214
+ if (h.aa.align < MO_128) {
215
+ use_pair = true;
216
+ l1 = gen_new_label();
217
+ l2 = gen_new_label();
218
+
219
+ tcg_out_testi(s, h.base, 15);
220
+ tcg_out_jxx(s, JCC_JNE, l2, true);
221
+ }
222
+
223
+ tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC,
224
+ datalo, datahi);
225
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
226
+ TCG_TMP_VEC, 0,
227
+ h.base, h.index, 0, h.ofs);
228
+
229
+ if (use_pair) {
230
+ tcg_out_jxx(s, JCC_JMP, l1, true);
231
+ tcg_out_label(s, l2);
232
+ }
233
+ }
234
+ if (use_pair) {
235
+ if (use_movbe) {
236
+ TCGReg t = datalo;
237
+ datalo = datahi;
238
+ datahi = t;
239
+ }
240
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
241
+ h.base, h.index, 0, h.ofs);
242
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
243
+ h.base, h.index, 0, h.ofs + 8);
244
+ }
245
+ if (l1) {
246
+ tcg_out_label(s, l1);
247
+ }
248
+ }
249
+ break;
250
+
251
default:
252
g_assert_not_reached();
253
}
254
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
255
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
256
}
257
break;
258
+ case INDEX_op_qemu_ld_i128:
259
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
260
+ tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
261
+ break;
262
case INDEX_op_qemu_st_i32:
263
case INDEX_op_qemu_st8_i32:
264
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
265
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
266
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
267
}
268
break;
269
+ case INDEX_op_qemu_st_i128:
270
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
271
+ tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
272
+ break;
273
274
OP_32_64(mulu2):
275
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
276
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
277
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
278
: C_O0_I4(L, L, L, L));
279
280
+ case INDEX_op_qemu_ld_i128:
281
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
282
+ return C_O2_I1(r, r, L);
283
+ case INDEX_op_qemu_st_i128:
284
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
285
+ return C_O0_I3(L, L, L);
286
+
287
case INDEX_op_brcond2_i32:
288
return C_O0_I4(r, r, ri, ri);
289
290
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
291
292
s->reserved_regs = 0;
293
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
294
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
295
#ifdef _WIN64
296
/* These are call saved, and we don't save them, so don't use them. */
297
tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
39
--
298
--
40
2.34.1
299
2.34.1
diff view generated by jsdifflib
1
We're about to start validating PAGE_EXEC, which means
1
We will need to allocate a second general-purpose temporary.
2
that we've got to put this code into a section that is
2
Rename the existing temps to add a distinguishing number.
3
both writable and executable.
4
3
5
Note that this test did not run on hardware beforehand either.
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
7
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
8
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tests/tcg/i386/test-i386.c | 2 +-
7
tcg/aarch64/tcg-target.c.inc | 50 ++++++++++++++++++------------------
12
1 file changed, 1 insertion(+), 1 deletion(-)
8
1 file changed, 25 insertions(+), 25 deletions(-)
13
9
14
diff --git a/tests/tcg/i386/test-i386.c b/tests/tcg/i386/test-i386.c
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/tcg/i386/test-i386.c
12
--- a/tcg/aarch64/tcg-target.c.inc
17
+++ b/tests/tcg/i386/test-i386.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ uint8_t code[] = {
14
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
19
0xc3, /* ret */
15
bool have_lse;
16
bool have_lse2;
17
18
-#define TCG_REG_TMP TCG_REG_X30
19
-#define TCG_VEC_TMP TCG_REG_V31
20
+#define TCG_REG_TMP0 TCG_REG_X30
21
+#define TCG_VEC_TMP0 TCG_REG_V31
22
23
#ifndef CONFIG_SOFTMMU
24
/* Note that XZR cannot be encoded in the address base register slot,
25
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
26
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
27
TCGReg r, TCGReg base, intptr_t offset)
28
{
29
- TCGReg temp = TCG_REG_TMP;
30
+ TCGReg temp = TCG_REG_TMP0;
31
32
if (offset < -0xffffff || offset > 0xffffff) {
33
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
35
}
36
37
/* Worst-case scenario, move offset to temp register, use reg offset. */
38
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
39
- tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
40
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
41
+ tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
42
}
43
44
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
46
if (offset == sextract64(offset, 0, 26)) {
47
tcg_out_insn(s, 3206, BL, offset);
48
} else {
49
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
50
- tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
51
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
52
+ tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
53
}
54
}
55
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
57
AArch64Insn insn;
58
59
if (rl == ah || (!const_bh && rl == bh)) {
60
- rl = TCG_REG_TMP;
61
+ rl = TCG_REG_TMP0;
62
}
63
64
if (const_bl) {
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
66
possibility of adding 0+const in the low part, and the
67
immediate add instructions encode XSP not XZR. Don't try
68
anything more elaborate here than loading another zero. */
69
- al = TCG_REG_TMP;
70
+ al = TCG_REG_TMP0;
71
tcg_out_movi(s, ext, al, 0);
72
}
73
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
75
{
76
TCGReg a1 = a0;
77
if (is_ctz) {
78
- a1 = TCG_REG_TMP;
79
+ a1 = TCG_REG_TMP0;
80
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
81
}
82
if (const_b && b == (ext ? 64 : 32)) {
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
84
AArch64Insn sel = I3506_CSEL;
85
86
tcg_out_cmp(s, ext, a0, 0, 1);
87
- tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
88
+ tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
89
90
if (const_b) {
91
if (b == -1) {
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
93
b = d;
94
}
95
}
96
- tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
97
+ tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
98
}
99
}
100
101
@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
102
}
103
104
static const TCGLdstHelperParam ldst_helper_param = {
105
- .ntmp = 1, .tmp = { TCG_REG_TMP }
106
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
20
};
107
};
21
108
22
-asm(".section \".data\"\n"
109
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
23
+asm(".section \".data_x\",\"awx\"\n"
110
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
24
"smc_code2:\n"
111
25
"movl 4(%esp), %eax\n"
112
set_jmp_insn_offset(s, which);
26
"movl %eax, smc_patch_addr2 + 1\n"
113
tcg_out32(s, I3206_B);
114
- tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
115
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
116
set_jmp_reset_offset(s, which);
117
}
118
119
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
120
ptrdiff_t i_offset = i_addr - jmp_rx;
121
122
/* Note that we asserted this in range in tcg_out_goto_tb. */
123
- insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
124
+ insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
125
}
126
qatomic_set((uint32_t *)jmp_rw, insn);
127
flush_idcache_range(jmp_rx, jmp_rw, 4);
128
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
129
130
case INDEX_op_rem_i64:
131
case INDEX_op_rem_i32:
132
- tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
133
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
134
+ tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
135
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
136
break;
137
case INDEX_op_remu_i64:
138
case INDEX_op_remu_i32:
139
- tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
140
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
141
+ tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
142
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
143
break;
144
145
case INDEX_op_shl_i64:
146
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
147
if (c2) {
148
tcg_out_rotl(s, ext, a0, a1, a2);
149
} else {
150
- tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
151
- tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
152
+ tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
153
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
154
}
155
break;
156
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
158
break;
159
}
160
}
161
- tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
162
- a2 = TCG_VEC_TMP;
163
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
164
+ a2 = TCG_VEC_TMP0;
165
}
166
if (is_scalar) {
167
insn = cmp_scalar_insn[cond];
168
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
169
s->reserved_regs = 0;
170
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
171
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
172
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
174
- tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
175
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
177
}
178
179
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
27
--
180
--
28
2.34.1
181
2.34.1
diff view generated by jsdifflib
1
We're about to start validating PAGE_EXEC, which means
1
Use LDXP+STXP when LSE2 is not present and 16-byte atomicity is required,
2
that we've got to mark the commpage executable. We had
2
and LDP/STP otherwise. This requires allocating a second general-purpose
3
been placing the commpage outside of reserved_va, which
3
temporary, as Rs cannot overlap Rn in STXP.
4
was incorrect and lead to an abort.
5
4
6
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
7
---
10
linux-user/arm/target_cpu.h | 4 ++--
8
tcg/aarch64/tcg-target-con-set.h | 2 +
11
linux-user/elfload.c | 6 +++++-
9
tcg/aarch64/tcg-target.h | 11 +-
12
2 files changed, 7 insertions(+), 3 deletions(-)
10
tcg/aarch64/tcg-target.c.inc | 179 ++++++++++++++++++++++++++++++-
11
3 files changed, 189 insertions(+), 3 deletions(-)
13
12
14
diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h
13
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/linux-user/arm/target_cpu.h
15
--- a/tcg/aarch64/tcg-target-con-set.h
17
+++ b/linux-user/arm/target_cpu.h
16
+++ b/tcg/aarch64/tcg-target-con-set.h
18
@@ -XXX,XX +XXX,XX @@ static inline unsigned long arm_max_reserved_va(CPUState *cs)
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(lZ, l)
19
} else {
18
C_O0_I2(r, rA)
20
/*
19
C_O0_I2(rZ, r)
21
* We need to be able to map the commpage.
20
C_O0_I2(w, r)
22
- * See validate_guest_space in linux-user/elfload.c.
21
+C_O0_I3(lZ, lZ, l)
23
+ * See init_guest_commpage in linux-user/elfload.c.
22
C_O1_I1(r, l)
24
*/
23
C_O1_I1(r, r)
25
- return 0xffff0000ul;
24
C_O1_I1(w, r)
26
+ return 0xfffffffful;
25
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wO)
26
C_O1_I2(w, w, wZ)
27
C_O1_I3(w, w, w, w)
28
C_O1_I4(r, r, rA, rZ, rZ)
29
+C_O2_I1(r, r, l)
30
C_O2_I4(r, r, rZ, rZ, rA, rMZ)
31
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/aarch64/tcg-target.h
34
+++ b/tcg/aarch64/tcg-target.h
35
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
36
#define TCG_TARGET_HAS_muluh_i64 1
37
#define TCG_TARGET_HAS_mulsh_i64 1
38
39
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
40
+/*
41
+ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
42
+ * which requires writable pages. We must defer to the helper for user-only,
43
+ * but in system mode all ram is writable for the host.
44
+ */
45
+#ifdef CONFIG_USER_ONLY
46
+#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
47
+#else
48
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
49
+#endif
50
51
#define TCG_TARGET_HAS_v64 1
52
#define TCG_TARGET_HAS_v128 1
53
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/aarch64/tcg-target.c.inc
56
+++ b/tcg/aarch64/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ bool have_lse;
58
bool have_lse2;
59
60
#define TCG_REG_TMP0 TCG_REG_X30
61
+#define TCG_REG_TMP1 TCG_REG_X17
62
#define TCG_VEC_TMP0 TCG_REG_V31
63
64
#ifndef CONFIG_SOFTMMU
65
@@ -XXX,XX +XXX,XX @@ typedef enum {
66
I3305_LDR_v64 = 0x5c000000,
67
I3305_LDR_v128 = 0x9c000000,
68
69
+ /* Load/store exclusive. */
70
+ I3306_LDXP = 0xc8600000,
71
+ I3306_STXP = 0xc8200000,
72
+
73
/* Load/store register. Described here as 3.3.12, but the helper
74
that emits them can transform to 3.3.10 or 3.3.13. */
75
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
76
@@ -XXX,XX +XXX,XX @@ typedef enum {
77
I3406_ADR = 0x10000000,
78
I3406_ADRP = 0x90000000,
79
80
+ /* Add/subtract extended register instructions. */
81
+ I3501_ADD = 0x0b200000,
82
+
83
/* Add/subtract shifted register instructions (without a shift). */
84
I3502_ADD = 0x0b000000,
85
I3502_ADDS = 0x2b000000,
86
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
87
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
88
}
89
90
+static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
91
+ TCGReg rt, TCGReg rt2, TCGReg rn)
92
+{
93
+ tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
94
+}
95
+
96
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
97
TCGReg rt, int imm19)
98
{
99
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
100
tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
101
}
102
103
+static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
104
+ TCGType sf, TCGReg rd, TCGReg rn,
105
+ TCGReg rm, int opt, int imm3)
106
+{
107
+ tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
108
+ imm3 << 10 | rn << 5 | rd);
109
+}
110
+
111
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
112
the rare occasion when we actually want to supply a shift amount. */
113
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
114
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
115
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
116
TCGLabelQemuLdst *ldst = NULL;
117
MemOp opc = get_memop(oi);
118
+ MemOp s_bits = opc & MO_SIZE;
119
unsigned a_mask;
120
121
h->aa = atom_and_align_for_opc(s, opc,
122
have_lse2 ? MO_ATOM_WITHIN16
123
: MO_ATOM_IFALIGN,
124
- false);
125
+ s_bits == MO_128);
126
a_mask = (1 << h->aa.align) - 1;
127
128
#ifdef CONFIG_SOFTMMU
129
- unsigned s_bits = opc & MO_SIZE;
130
unsigned s_mask = (1u << s_bits) - 1;
131
unsigned mem_index = get_mmuidx(oi);
132
TCGReg x3;
133
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
27
}
134
}
28
}
135
}
29
#define MAX_RESERVED_VA arm_max_reserved_va
136
30
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
137
+static TCGLabelQemuLdst *
31
index XXXXXXX..XXXXXXX 100644
138
+prepare_host_addr_base_only(TCGContext *s, HostAddress *h, TCGReg addr_reg,
32
--- a/linux-user/elfload.c
139
+ MemOpIdx oi, bool is_ld)
33
+++ b/linux-user/elfload.c
140
+{
34
@@ -XXX,XX +XXX,XX @@ enum {
141
+ TCGLabelQemuLdst *ldst;
35
142
+
36
static bool init_guest_commpage(void)
143
+ ldst = prepare_host_addr(s, h, addr_reg, oi, true);
37
{
144
+
38
- void *want = g2h_untagged(HI_COMMPAGE & -qemu_host_page_size);
145
+ /* Compose the final address, as LDP/STP have no indexing. */
39
+ abi_ptr commpage = HI_COMMPAGE & -qemu_host_page_size;
146
+ if (h->index != TCG_REG_XZR) {
40
+ void *want = g2h_untagged(commpage);
147
+ tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, TCG_REG_TMP0,
41
void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE,
148
+ h->base, h->index,
42
MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
149
+ h->index_ext == TCG_TYPE_I32 ? MO_32 : MO_64, 0);
43
150
+ h->base = TCG_REG_TMP0;
44
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
151
+ h->index = TCG_REG_XZR;
45
perror("Protecting guest commpage");
152
+ h->index_ext = TCG_TYPE_I64;
46
exit(EXIT_FAILURE);
153
+ }
47
}
154
+
48
+
155
+ return ldst;
49
+ page_set_flags(commpage, commpage + qemu_host_page_size,
156
+}
50
+ PAGE_READ | PAGE_EXEC | PAGE_VALID);
157
+
51
return true;
158
+static void tcg_out_qemu_ld128(TCGContext *s, TCGReg datalo, TCGReg datahi,
159
+ TCGReg addr_reg, MemOpIdx oi)
160
+{
161
+ TCGLabelQemuLdst *ldst;
162
+ HostAddress h;
163
+
164
+ ldst = prepare_host_addr_base_only(s, &h, addr_reg, oi, true);
165
+
166
+ if (h.aa.atom < MO_128 || have_lse2) {
167
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, h.base, 0, 0, 0);
168
+ } else {
169
+ TCGLabel *l0, *l1 = NULL;
170
+
171
+ /*
172
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
173
+ * 1: ldxp lo,hi,[addr]
174
+ * stxp tmp1,lo,hi,[addr]
175
+ * cbnz tmp1, 1b
176
+ *
177
+ * If we have already checked for 16-byte alignment, that's all
178
+ * we need. Otherwise we have determined that misaligned atomicity
179
+ * may be handled with two 8-byte loads.
180
+ */
181
+ if (h.aa.align < MO_128) {
182
+ /*
183
+ * TODO: align should be MO_64, so we only need test bit 3,
184
+ * which means we could use TBNZ instead of AND+CBNE.
185
+ */
186
+ l1 = gen_new_label();
187
+ tcg_out_logicali(s, I3404_ANDI, 0, TCG_REG_TMP1, addr_reg, 15);
188
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE,
189
+ TCG_REG_TMP1, 0, 1, l1);
190
+ }
191
+
192
+ l0 = gen_new_label();
193
+ tcg_out_label(s, l0);
194
+
195
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, datalo, datahi, h.base);
196
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP1, datalo, datahi, h.base);
197
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE, TCG_REG_TMP1, 0, 1, l0);
198
+
199
+ if (l1) {
200
+ TCGLabel *l2 = gen_new_label();
201
+ tcg_out_goto_label(s, l2);
202
+
203
+ tcg_out_label(s, l1);
204
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, h.base, 0, 0, 0);
205
+
206
+ tcg_out_label(s, l2);
207
+ }
208
+ }
209
+
210
+ if (ldst) {
211
+ ldst->type = TCG_TYPE_I128;
212
+ ldst->datalo_reg = datalo;
213
+ ldst->datahi_reg = datahi;
214
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
215
+ }
216
+}
217
+
218
+static void tcg_out_qemu_st128(TCGContext *s, TCGReg datalo, TCGReg datahi,
219
+ TCGReg addr_reg, MemOpIdx oi)
220
+{
221
+ TCGLabelQemuLdst *ldst;
222
+ HostAddress h;
223
+
224
+ ldst = prepare_host_addr_base_only(s, &h, addr_reg, oi, false);
225
+
226
+ if (h.aa.atom < MO_128 || have_lse2) {
227
+ tcg_out_insn(s, 3314, STP, datalo, datahi, h.base, 0, 0, 0);
228
+ } else {
229
+ TCGLabel *l0, *l1 = NULL;
230
+
231
+ /*
232
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
233
+ * 1: ldxp xzr,tmp1,[addr]
234
+ * stxp tmp1,lo,hi,[addr]
235
+ * cbnz tmp1, 1b
236
+ *
237
+ * If we have already checked for 16-byte alignment, that's all
238
+ * we need. Otherwise we have determined that misaligned atomicity
239
+ * may be handled with two 8-byte stores.
240
+ */
241
+ if (h.aa.align < MO_128) {
242
+ /*
243
+ * TODO: align should be MO_64, so we only need test bit 3,
244
+ * which means we could use TBNZ instead of AND+CBNE.
245
+ */
246
+ l1 = gen_new_label();
247
+ tcg_out_logicali(s, I3404_ANDI, 0, TCG_REG_TMP1, addr_reg, 15);
248
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE,
249
+ TCG_REG_TMP1, 0, 1, l1);
250
+ }
251
+
252
+ l0 = gen_new_label();
253
+ tcg_out_label(s, l0);
254
+
255
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR,
256
+ TCG_REG_XZR, TCG_REG_TMP1, h.base);
257
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP1, datalo, datahi, h.base);
258
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE, TCG_REG_TMP1, 0, 1, l0);
259
+
260
+ if (l1) {
261
+ TCGLabel *l2 = gen_new_label();
262
+ tcg_out_goto_label(s, l2);
263
+
264
+ tcg_out_label(s, l1);
265
+ tcg_out_insn(s, 3314, STP, datalo, datahi, h.base, 0, 0, 0);
266
+
267
+ tcg_out_label(s, l2);
268
+ }
269
+ }
270
+
271
+ if (ldst) {
272
+ ldst->type = TCG_TYPE_I128;
273
+ ldst->datalo_reg = datalo;
274
+ ldst->datahi_reg = datahi;
275
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
276
+ }
277
+}
278
+
279
static const tcg_insn_unit *tb_ret_addr;
280
281
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
282
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
283
case INDEX_op_qemu_st_i64:
284
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
285
break;
286
+ case INDEX_op_qemu_ld_i128:
287
+ tcg_out_qemu_ld128(s, a0, a1, a2, args[3]);
288
+ break;
289
+ case INDEX_op_qemu_st_i128:
290
+ tcg_out_qemu_st128(s, REG0(0), REG0(1), a2, args[3]);
291
+ break;
292
293
case INDEX_op_bswap64_i64:
294
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
295
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
296
case INDEX_op_qemu_ld_i32:
297
case INDEX_op_qemu_ld_i64:
298
return C_O1_I1(r, l);
299
+ case INDEX_op_qemu_ld_i128:
300
+ return C_O2_I1(r, r, l);
301
case INDEX_op_qemu_st_i32:
302
case INDEX_op_qemu_st_i64:
303
return C_O0_I2(lZ, l);
304
+ case INDEX_op_qemu_st_i128:
305
+ return C_O0_I3(lZ, lZ, l);
306
307
case INDEX_op_deposit_i32:
308
case INDEX_op_deposit_i64:
309
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
310
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
311
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
312
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
313
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
314
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
52
}
315
}
53
316
54
--
317
--
55
2.34.1
318
2.34.1
diff view generated by jsdifflib
1
Right now the translator stops right *after* the end of a page, which
1
Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required.
2
breaks reporting of fault locations when the last instruction of a
2
Note that these instructions do not require 16-byte alignment.
3
multi-insn translation block crosses a page boundary.
4
3
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1155
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
8
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
target/riscv/translate.c | 17 +++++--
7
tcg/ppc/tcg-target-con-set.h | 2 +
12
tests/tcg/riscv64/noexec.c | 79 +++++++++++++++++++++++++++++++
8
tcg/ppc/tcg-target-con-str.h | 1 +
13
tests/tcg/riscv64/Makefile.target | 1 +
9
tcg/ppc/tcg-target.h | 3 +-
14
3 files changed, 93 insertions(+), 4 deletions(-)
10
tcg/ppc/tcg-target.c.inc | 115 +++++++++++++++++++++++++++++++----
15
create mode 100644 tests/tcg/riscv64/noexec.c
11
4 files changed, 108 insertions(+), 13 deletions(-)
16
12
17
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
13
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
18
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
19
--- a/target/riscv/translate.c
15
--- a/tcg/ppc/tcg-target-con-set.h
20
+++ b/target/riscv/translate.c
16
+++ b/tcg/ppc/tcg-target-con-set.h
21
@@ -XXX,XX +XXX,XX @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
22
}
18
C_O0_I2(r, ri)
23
ctx->nftemp = 0;
19
C_O0_I2(v, r)
24
20
C_O0_I3(r, r, r)
25
+ /* Only the first insn within a TB is allowed to cross a page boundary. */
21
+C_O0_I3(o, m, r)
26
if (ctx->base.is_jmp == DISAS_NEXT) {
22
C_O0_I4(r, r, ri, ri)
27
- target_ulong page_start;
23
C_O0_I4(r, r, r, r)
24
C_O1_I1(r, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rZ, rZ)
27
C_O1_I4(r, r, r, ri, ri)
28
C_O2_I1(r, r, r)
29
+C_O2_I1(o, m, r)
30
C_O2_I2(r, r, r, r)
31
C_O2_I4(r, r, rI, rZM, r, r)
32
C_O2_I4(r, r, r, r, rI, rZM)
33
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/ppc/tcg-target-con-str.h
36
+++ b/tcg/ppc/tcg-target-con-str.h
37
@@ -XXX,XX +XXX,XX @@
38
* REGS(letter, register_mask)
39
*/
40
REGS('r', ALL_GENERAL_REGS)
41
+REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
42
REGS('v', ALL_VECTOR_REGS)
43
44
/*
45
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/ppc/tcg-target.h
48
+++ b/tcg/ppc/tcg-target.h
49
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
50
#define TCG_TARGET_HAS_mulsh_i64 1
51
#endif
52
53
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
54
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
55
+ (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
56
57
/*
58
* While technically Altivec could support V64, it has no 64-bit store
59
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/ppc/tcg-target.c.inc
62
+++ b/tcg/ppc/tcg-target.c.inc
63
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
64
65
#define B OPCD( 18)
66
#define BC OPCD( 16)
67
+
68
#define LBZ OPCD( 34)
69
#define LHZ OPCD( 40)
70
#define LHA OPCD( 42)
71
#define LWZ OPCD( 32)
72
#define LWZUX XO31( 55)
73
-#define STB OPCD( 38)
74
-#define STH OPCD( 44)
75
-#define STW OPCD( 36)
28
-
76
-
29
- page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
77
-#define STD XO62( 0)
30
- if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE) {
78
-#define STDU XO62( 1)
31
+ if (!is_same_page(&ctx->base, ctx->base.pc_next)) {
79
-#define STDX XO31(149)
32
ctx->base.is_jmp = DISAS_TOO_MANY;
80
-
33
+ } else {
81
#define LD XO58( 0)
34
+ unsigned page_ofs = ctx->base.pc_next & ~TARGET_PAGE_MASK;
82
#define LDX XO31( 21)
35
+
83
#define LDU XO58( 1)
36
+ if (page_ofs > TARGET_PAGE_SIZE - MAX_INSN_LEN) {
84
#define LDUX XO31( 53)
37
+ uint16_t next_insn = cpu_lduw_code(env, ctx->base.pc_next);
85
#define LWA XO58( 2)
38
+ int len = insn_len(next_insn);
86
#define LWAX XO31(341)
39
+
87
+#define LQ OPCD( 56)
40
+ if (!is_same_page(&ctx->base, ctx->base.pc_next + len)) {
88
+
41
+ ctx->base.is_jmp = DISAS_TOO_MANY;
89
+#define STB OPCD( 38)
42
+ }
90
+#define STH OPCD( 44)
43
+ }
91
+#define STW OPCD( 36)
44
}
92
+#define STD XO62( 0)
93
+#define STDU XO62( 1)
94
+#define STDX XO31(149)
95
+#define STQ XO62( 2)
96
97
#define ADDIC OPCD( 12)
98
#define ADDI OPCD( 14)
99
@@ -XXX,XX +XXX,XX @@ typedef struct {
100
101
bool tcg_target_has_memory_bswap(MemOp memop)
102
{
103
- return true;
104
+ TCGAtomAlign aa;
105
+
106
+ if ((memop & MO_SIZE) <= MO_64) {
107
+ return true;
108
+ }
109
+
110
+ /*
111
+ * Reject 16-byte memop with 16-byte atomicity,
112
+ * but do allow a pair of 64-bit operations.
113
+ */
114
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
115
+ return aa.atom <= MO_64;
116
}
117
118
/*
119
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
120
{
121
TCGLabelQemuLdst *ldst = NULL;
122
MemOp opc = get_memop(oi);
123
- MemOp a_bits;
124
+ MemOp a_bits, s_bits;
125
126
/*
127
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
128
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
129
* As of 3.0, "the non-atomic access is performed as described in
130
* the corresponding list", which matches MO_ATOM_SUBALIGN.
131
*/
132
+ s_bits = opc & MO_SIZE;
133
h->aa = atom_and_align_for_opc(s, opc,
134
have_isa_3_00 ? MO_ATOM_SUBALIGN
135
: MO_ATOM_IFALIGN,
136
- false);
137
+ s_bits == MO_128);
138
a_bits = h->aa.align;
139
140
#ifdef CONFIG_SOFTMMU
141
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
142
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
143
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
144
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
145
- unsigned s_bits = opc & MO_SIZE;
146
147
ldst = new_ldst_label(s);
148
ldst->is_ld = is_ld;
149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
45
}
150
}
46
}
151
}
47
diff --git a/tests/tcg/riscv64/noexec.c b/tests/tcg/riscv64/noexec.c
152
48
new file mode 100644
153
+static TCGLabelQemuLdst *
49
index XXXXXXX..XXXXXXX
154
+prepare_host_addr_index_only(TCGContext *s, HostAddress *h, TCGReg addr_reg,
50
--- /dev/null
155
+ MemOpIdx oi, bool is_ld)
51
+++ b/tests/tcg/riscv64/noexec.c
52
@@ -XXX,XX +XXX,XX @@
53
+#include "../multiarch/noexec.c.inc"
54
+
55
+static void *arch_mcontext_pc(const mcontext_t *ctx)
56
+{
156
+{
57
+ return (void *)ctx->__gregs[REG_PC];
157
+ TCGLabelQemuLdst *ldst;
158
+
159
+ ldst = prepare_host_addr(s, h, addr_reg, -1, oi, true);
160
+
161
+ /* Compose the final address, as LQ/STQ have no indexing. */
162
+ if (h->base != 0) {
163
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, h->base, h->index));
164
+ h->index = TCG_REG_TMP1;
165
+ h->base = 0;
166
+ }
167
+
168
+ return ldst;
58
+}
169
+}
59
+
170
+
60
+static int arch_mcontext_arg(const mcontext_t *ctx)
171
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
172
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
61
+{
173
+{
62
+ return ctx->__gregs[REG_A0];
174
+ TCGLabelQemuLdst *ldst;
175
+ HostAddress h;
176
+ bool need_bswap;
177
+ uint32_t insn;
178
+
179
+ ldst = prepare_host_addr_index_only(s, &h, addr_reg, oi, is_ld);
180
+ need_bswap = get_memop(oi) & MO_BSWAP;
181
+
182
+ if (h.aa.atom == MO_128) {
183
+ tcg_debug_assert(!need_bswap);
184
+ tcg_debug_assert(datalo & 1);
185
+ tcg_debug_assert(datahi == datalo - 1);
186
+ insn = is_ld ? LQ : STQ;
187
+ tcg_out32(s, insn | TAI(datahi, h.index, 0));
188
+ } else {
189
+ TCGReg d1, d2;
190
+
191
+ if (HOST_BIG_ENDIAN ^ need_bswap) {
192
+ d1 = datahi, d2 = datalo;
193
+ } else {
194
+ d1 = datalo, d2 = datahi;
195
+ }
196
+
197
+ if (need_bswap) {
198
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
199
+ insn = is_ld ? LDBRX : STDBRX;
200
+ tcg_out32(s, insn | TAB(d1, 0, h.index));
201
+ tcg_out32(s, insn | TAB(d2, h.index, TCG_REG_R0));
202
+ } else {
203
+ insn = is_ld ? LD : STD;
204
+ tcg_out32(s, insn | TAI(d1, h.index, 0));
205
+ tcg_out32(s, insn | TAI(d2, h.index, 8));
206
+ }
207
+ }
208
+
209
+ if (ldst) {
210
+ ldst->type = TCG_TYPE_I128;
211
+ ldst->datalo_reg = datalo;
212
+ ldst->datahi_reg = datahi;
213
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
214
+ }
63
+}
215
+}
64
+
216
+
65
+static void arch_flush(void *p, int len)
217
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
66
+{
218
{
67
+ __builtin___clear_cache(p, p + len);
219
int i;
68
+}
220
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
69
+
221
args[4], TCG_TYPE_I64);
70
+extern char noexec_1[];
222
}
71
+extern char noexec_2[];
223
break;
72
+extern char noexec_end[];
224
+ case INDEX_op_qemu_ld_i128:
73
+
225
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
74
+asm(".option push\n"
226
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
75
+ ".option norvc\n"
227
+ break;
76
+ "noexec_1:\n"
228
+
77
+ " li a0,1\n" /* a0 is 0 on entry, set 1. */
229
case INDEX_op_qemu_st_i32:
78
+ "noexec_2:\n"
230
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
79
+ " li a0,2\n" /* a0 is 0/1; set 2. */
231
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
80
+ " ret\n"
232
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
81
+ "noexec_end:\n"
233
args[4], TCG_TYPE_I64);
82
+ ".option pop");
234
}
83
+
235
break;
84
+int main(void)
236
+ case INDEX_op_qemu_st_i128:
85
+{
237
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
86
+ struct noexec_test noexec_tests[] = {
238
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
87
+ {
239
+ break;
88
+ .name = "fallthrough",
240
89
+ .test_code = noexec_1,
241
case INDEX_op_setcond_i32:
90
+ .test_len = noexec_end - noexec_1,
242
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
91
+ .page_ofs = noexec_1 - noexec_2,
243
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
92
+ .entry_ofs = noexec_1 - noexec_2,
244
: TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
93
+ .expected_si_ofs = 0,
245
: C_O0_I4(r, r, r, r));
94
+ .expected_pc_ofs = 0,
246
95
+ .expected_arg = 1,
247
+ case INDEX_op_qemu_ld_i128:
96
+ },
248
+ return C_O2_I1(o, m, r);
97
+ {
249
+ case INDEX_op_qemu_st_i128:
98
+ .name = "jump",
250
+ return C_O0_I3(o, m, r);
99
+ .test_code = noexec_1,
251
+
100
+ .test_len = noexec_end - noexec_1,
252
case INDEX_op_add_vec:
101
+ .page_ofs = noexec_1 - noexec_2,
253
case INDEX_op_sub_vec:
102
+ .entry_ofs = 0,
254
case INDEX_op_mul_vec:
103
+ .expected_si_ofs = 0,
104
+ .expected_pc_ofs = 0,
105
+ .expected_arg = 0,
106
+ },
107
+ {
108
+ .name = "fallthrough [cross]",
109
+ .test_code = noexec_1,
110
+ .test_len = noexec_end - noexec_1,
111
+ .page_ofs = noexec_1 - noexec_2 - 2,
112
+ .entry_ofs = noexec_1 - noexec_2 - 2,
113
+ .expected_si_ofs = 0,
114
+ .expected_pc_ofs = -2,
115
+ .expected_arg = 1,
116
+ },
117
+ {
118
+ .name = "jump [cross]",
119
+ .test_code = noexec_1,
120
+ .test_len = noexec_end - noexec_1,
121
+ .page_ofs = noexec_1 - noexec_2 - 2,
122
+ .entry_ofs = -2,
123
+ .expected_si_ofs = 0,
124
+ .expected_pc_ofs = -2,
125
+ .expected_arg = 0,
126
+ },
127
+ };
128
+
129
+ return test_noexec(noexec_tests,
130
+ sizeof(noexec_tests) / sizeof(noexec_tests[0]));
131
+}
132
diff --git a/tests/tcg/riscv64/Makefile.target b/tests/tcg/riscv64/Makefile.target
133
index XXXXXXX..XXXXXXX 100644
134
--- a/tests/tcg/riscv64/Makefile.target
135
+++ b/tests/tcg/riscv64/Makefile.target
136
@@ -XXX,XX +XXX,XX @@
137
138
VPATH += $(SRC_PATH)/tests/tcg/riscv64
139
TESTS += test-div
140
+TESTS += noexec
141
--
255
--
142
2.34.1
256
2.34.1
diff view generated by jsdifflib
1
From: Ilya Leoshkevich <iii@linux.ibm.com>
1
Use LPQ/STPQ when 16-byte atomicity is required.
2
Note that these instructions require 16-byte alignment.
2
3
3
Introduce a function that checks whether a given address is on the same
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
page as where disassembly started. Having it improves readability of
5
the following patches.
6
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
9
Message-Id: <20220811095534.241224-3-iii@linux.ibm.com>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
[rth: Make the DisasContextBase parameter const.]
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
6
---
14
include/exec/translator.h | 10 ++++++++++
7
tcg/s390x/tcg-target-con-set.h | 2 +
15
1 file changed, 10 insertions(+)
8
tcg/s390x/tcg-target.h | 2 +-
9
tcg/s390x/tcg-target.c.inc | 103 ++++++++++++++++++++++++++++++++-
10
3 files changed, 103 insertions(+), 4 deletions(-)
16
11
17
diff --git a/include/exec/translator.h b/include/exec/translator.h
12
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
18
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
19
--- a/include/exec/translator.h
14
--- a/tcg/s390x/tcg-target-con-set.h
20
+++ b/include/exec/translator.h
15
+++ b/tcg/s390x/tcg-target-con-set.h
21
@@ -XXX,XX +XXX,XX @@ FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
16
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
22
17
C_O0_I2(r, ri)
23
#undef GEN_TRANSLATOR_LD
18
C_O0_I2(r, rA)
24
19
C_O0_I2(v, r)
25
+/*
20
+C_O0_I3(o, m, r)
26
+ * Return whether addr is on the same page as where disassembly started.
21
C_O1_I1(r, r)
27
+ * Translators can use this to enforce the rule that only single-insn
22
C_O1_I1(v, r)
28
+ * translation blocks are allowed to cross page boundaries.
23
C_O1_I1(v, v)
29
+ */
24
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
30
+static inline bool is_same_page(const DisasContextBase *db, target_ulong addr)
25
C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rI, r)
27
C_O1_I4(r, r, rA, rI, r)
28
+C_O2_I1(o, m, r)
29
C_O2_I2(o, m, 0, r)
30
C_O2_I2(o, m, r, r)
31
C_O2_I3(o, m, 0, 1, r)
32
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/s390x/tcg-target.h
35
+++ b/tcg/s390x/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
37
#define TCG_TARGET_HAS_muluh_i64 0
38
#define TCG_TARGET_HAS_mulsh_i64 0
39
40
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
41
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
42
43
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
44
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
45
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/s390x/tcg-target.c.inc
48
+++ b/tcg/s390x/tcg-target.c.inc
49
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
50
RXY_LLGF = 0xe316,
51
RXY_LLGH = 0xe391,
52
RXY_LMG = 0xeb04,
53
+ RXY_LPQ = 0xe38f,
54
RXY_LRV = 0xe31e,
55
RXY_LRVG = 0xe30f,
56
RXY_LRVH = 0xe31f,
57
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
58
RXY_STG = 0xe324,
59
RXY_STHY = 0xe370,
60
RXY_STMG = 0xeb24,
61
+ RXY_STPQ = 0xe38e,
62
RXY_STRV = 0xe33e,
63
RXY_STRVG = 0xe32f,
64
RXY_STRVH = 0xe33f,
65
@@ -XXX,XX +XXX,XX @@ typedef struct {
66
67
bool tcg_target_has_memory_bswap(MemOp memop)
68
{
69
- return true;
70
+ TCGAtomAlign aa;
71
+
72
+ if ((memop & MO_SIZE) <= MO_64) {
73
+ return true;
74
+ }
75
+
76
+ /*
77
+ * Reject 16-byte memop with 16-byte atomicity,
78
+ * but do allow a pair of 64-bit operations.
79
+ */
80
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
81
+ return aa.atom <= MO_64;
82
}
83
84
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
85
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
86
{
87
TCGLabelQemuLdst *ldst = NULL;
88
MemOp opc = get_memop(oi);
89
+ MemOp s_bits = opc & MO_SIZE;
90
unsigned a_mask;
91
92
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
93
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
94
a_mask = (1 << h->aa.align) - 1;
95
96
#ifdef CONFIG_SOFTMMU
97
- unsigned s_bits = opc & MO_SIZE;
98
unsigned s_mask = (1 << s_bits) - 1;
99
int mem_index = get_mmuidx(oi);
100
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
101
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
102
}
103
}
104
105
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
106
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
31
+{
107
+{
32
+ return ((addr ^ db->pc_first) & TARGET_PAGE_MASK) == 0;
108
+ TCGLabel *l1 = NULL, *l2 = NULL;
109
+ TCGLabelQemuLdst *ldst;
110
+ HostAddress h;
111
+ bool need_bswap;
112
+ bool use_pair;
113
+ S390Opcode insn;
114
+
115
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
116
+
117
+ use_pair = h.aa.atom < MO_128;
118
+ need_bswap = get_memop(oi) & MO_BSWAP;
119
+
120
+ if (!use_pair) {
121
+ /*
122
+ * Atomicity requires we use LPQ. If we've already checked for
123
+ * 16-byte alignment, that's all we need. If we arrive with
124
+ * lesser alignment, we have determined that less than 16-byte
125
+ * alignment can be satisfied with two 8-byte loads.
126
+ */
127
+ if (h.aa.align < MO_128) {
128
+ use_pair = true;
129
+ l1 = gen_new_label();
130
+ l2 = gen_new_label();
131
+
132
+ tcg_out_insn(s, RI, TMLL, addr_reg, 15);
133
+ tgen_branch(s, 7, l1); /* CC in {1,2,3} */
134
+ }
135
+
136
+ tcg_debug_assert(!need_bswap);
137
+ tcg_debug_assert(datalo & 1);
138
+ tcg_debug_assert(datahi == datalo - 1);
139
+ insn = is_ld ? RXY_LPQ : RXY_STPQ;
140
+ tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
141
+
142
+ if (use_pair) {
143
+ tgen_branch(s, S390_CC_ALWAYS, l2);
144
+ tcg_out_label(s, l1);
145
+ }
146
+ }
147
+ if (use_pair) {
148
+ TCGReg d1, d2;
149
+
150
+ if (need_bswap) {
151
+ d1 = datalo, d2 = datahi;
152
+ insn = is_ld ? RXY_LRVG : RXY_STRVG;
153
+ } else {
154
+ d1 = datahi, d2 = datalo;
155
+ insn = is_ld ? RXY_LG : RXY_STG;
156
+ }
157
+
158
+ if (h.base == d1 || h.index == d1) {
159
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
160
+ h.base = TCG_TMP0;
161
+ h.index = TCG_REG_NONE;
162
+ h.disp = 0;
163
+ }
164
+ tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
165
+ tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
166
+ }
167
+ if (l2) {
168
+ tcg_out_label(s, l2);
169
+ }
170
+
171
+ if (ldst) {
172
+ ldst->type = TCG_TYPE_I128;
173
+ ldst->datalo_reg = datalo;
174
+ ldst->datahi_reg = datahi;
175
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
176
+ }
33
+}
177
+}
34
+
178
+
35
#endif /* EXEC__TRANSLATOR_H */
179
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
180
{
181
/* Reuse the zeroing that exists for goto_ptr. */
182
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
183
case INDEX_op_qemu_st_i64:
184
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
185
break;
186
+ case INDEX_op_qemu_ld_i128:
187
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
188
+ break;
189
+ case INDEX_op_qemu_st_i128:
190
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
191
+ break;
192
193
case INDEX_op_ld16s_i64:
194
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
195
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
196
case INDEX_op_qemu_st_i64:
197
case INDEX_op_qemu_st_i32:
198
return C_O0_I2(r, r);
199
+ case INDEX_op_qemu_ld_i128:
200
+ return C_O2_I1(o, m, r);
201
+ case INDEX_op_qemu_st_i128:
202
+ return C_O0_I3(o, m, r);
203
204
case INDEX_op_deposit_i32:
205
case INDEX_op_deposit_i64:
36
--
206
--
37
2.34.1
207
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/tcg-op-ldst.c | 1006 +++++++++++++++++++++++++++++++++++++++++++++
5
tcg/tcg-op.c | 974 -------------------------------------------
6
tcg/meson.build | 1 +
7
3 files changed, 1007 insertions(+), 974 deletions(-)
8
create mode 100644 tcg/tcg-op-ldst.c
1
9
10
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/tcg-op-ldst.c
15
@@ -XXX,XX +XXX,XX @@
16
+/*
17
+ * Tiny Code Generator for QEMU
18
+ *
19
+ * Copyright (c) 2008 Fabrice Bellard
20
+ *
21
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
22
+ * of this software and associated documentation files (the "Software"), to deal
23
+ * in the Software without restriction, including without limitation the rights
24
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25
+ * copies of the Software, and to permit persons to whom the Software is
26
+ * furnished to do so, subject to the following conditions:
27
+ *
28
+ * The above copyright notice and this permission notice shall be included in
29
+ * all copies or substantial portions of the Software.
30
+ *
31
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
34
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37
+ * THE SOFTWARE.
38
+ */
39
+
40
+#include "qemu/osdep.h"
41
+#include "exec/exec-all.h"
42
+#include "tcg/tcg.h"
43
+#include "tcg/tcg-temp-internal.h"
44
+#include "tcg/tcg-op.h"
45
+#include "tcg/tcg-mo.h"
46
+#include "exec/plugin-gen.h"
47
+#include "tcg-internal.h"
48
+
49
+
50
+static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
51
+{
52
+ /* Trigger the asserts within as early as possible. */
53
+ unsigned a_bits = get_alignment_bits(op);
54
+
55
+ /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
56
+ if (a_bits == (op & MO_SIZE)) {
57
+ op = (op & ~MO_AMASK) | MO_ALIGN;
58
+ }
59
+
60
+ switch (op & MO_SIZE) {
61
+ case MO_8:
62
+ op &= ~MO_BSWAP;
63
+ break;
64
+ case MO_16:
65
+ break;
66
+ case MO_32:
67
+ if (!is64) {
68
+ op &= ~MO_SIGN;
69
+ }
70
+ break;
71
+ case MO_64:
72
+ if (is64) {
73
+ op &= ~MO_SIGN;
74
+ break;
75
+ }
76
+ /* fall through */
77
+ default:
78
+ g_assert_not_reached();
79
+ }
80
+ if (st) {
81
+ op &= ~MO_SIGN;
82
+ }
83
+ return op;
84
+}
85
+
86
+static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
87
+ MemOp memop, TCGArg idx)
88
+{
89
+ MemOpIdx oi = make_memop_idx(memop, idx);
90
+#if TARGET_LONG_BITS == 32
91
+ tcg_gen_op3i_i32(opc, val, addr, oi);
92
+#else
93
+ if (TCG_TARGET_REG_BITS == 32) {
94
+ tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
95
+ } else {
96
+ tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
97
+ }
98
+#endif
99
+}
100
+
101
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
102
+ MemOp memop, TCGArg idx)
103
+{
104
+ MemOpIdx oi = make_memop_idx(memop, idx);
105
+#if TARGET_LONG_BITS == 32
106
+ if (TCG_TARGET_REG_BITS == 32) {
107
+ tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
108
+ } else {
109
+ tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
110
+ }
111
+#else
112
+ if (TCG_TARGET_REG_BITS == 32) {
113
+ tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
114
+ TCGV_LOW(addr), TCGV_HIGH(addr), oi);
115
+ } else {
116
+ tcg_gen_op3i_i64(opc, val, addr, oi);
117
+ }
118
+#endif
119
+}
120
+
121
+static void tcg_gen_req_mo(TCGBar type)
122
+{
123
+#ifdef TCG_GUEST_DEFAULT_MO
124
+ type &= TCG_GUEST_DEFAULT_MO;
125
+#endif
126
+ type &= ~TCG_TARGET_DEFAULT_MO;
127
+ if (type) {
128
+ tcg_gen_mb(type | TCG_BAR_SC);
129
+ }
130
+}
131
+
132
+static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
133
+{
134
+#ifdef CONFIG_PLUGIN
135
+ if (tcg_ctx->plugin_insn != NULL) {
136
+ /* Save a copy of the vaddr for use after a load. */
137
+ TCGv temp = tcg_temp_new();
138
+ tcg_gen_mov_tl(temp, vaddr);
139
+ return temp;
140
+ }
141
+#endif
142
+ return vaddr;
143
+}
144
+
145
+static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
146
+ enum qemu_plugin_mem_rw rw)
147
+{
148
+#ifdef CONFIG_PLUGIN
149
+ if (tcg_ctx->plugin_insn != NULL) {
150
+ qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
151
+ plugin_gen_empty_mem_callback(vaddr, info);
152
+ tcg_temp_free(vaddr);
153
+ }
154
+#endif
155
+}
156
+
157
+void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
158
+{
159
+ MemOp orig_memop;
160
+ MemOpIdx oi;
161
+
162
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
163
+ memop = tcg_canonicalize_memop(memop, 0, 0);
164
+ oi = make_memop_idx(memop, idx);
165
+
166
+ orig_memop = memop;
167
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
168
+ memop &= ~MO_BSWAP;
169
+ /* The bswap primitive benefits from zero-extended input. */
170
+ if ((memop & MO_SSIZE) == MO_SW) {
171
+ memop &= ~MO_SIGN;
172
+ }
173
+ }
174
+
175
+ addr = plugin_prep_mem_callbacks(addr);
176
+ gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
177
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
178
+
179
+ if ((orig_memop ^ memop) & MO_BSWAP) {
180
+ switch (orig_memop & MO_SIZE) {
181
+ case MO_16:
182
+ tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
183
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
184
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
185
+ break;
186
+ case MO_32:
187
+ tcg_gen_bswap32_i32(val, val);
188
+ break;
189
+ default:
190
+ g_assert_not_reached();
191
+ }
192
+ }
193
+}
194
+
195
+void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
196
+{
197
+ TCGv_i32 swap = NULL;
198
+ MemOpIdx oi;
199
+
200
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
201
+ memop = tcg_canonicalize_memop(memop, 0, 1);
202
+ oi = make_memop_idx(memop, idx);
203
+
204
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
205
+ swap = tcg_temp_ebb_new_i32();
206
+ switch (memop & MO_SIZE) {
207
+ case MO_16:
208
+ tcg_gen_bswap16_i32(swap, val, 0);
209
+ break;
210
+ case MO_32:
211
+ tcg_gen_bswap32_i32(swap, val);
212
+ break;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ val = swap;
217
+ memop &= ~MO_BSWAP;
218
+ }
219
+
220
+ addr = plugin_prep_mem_callbacks(addr);
221
+ if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
222
+ gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
223
+ } else {
224
+ gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
225
+ }
226
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
227
+
228
+ if (swap) {
229
+ tcg_temp_free_i32(swap);
230
+ }
231
+}
232
+
233
+void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
234
+{
235
+ MemOp orig_memop;
236
+ MemOpIdx oi;
237
+
238
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
239
+ tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
240
+ if (memop & MO_SIGN) {
241
+ tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
242
+ } else {
243
+ tcg_gen_movi_i32(TCGV_HIGH(val), 0);
244
+ }
245
+ return;
246
+ }
247
+
248
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
249
+ memop = tcg_canonicalize_memop(memop, 1, 0);
250
+ oi = make_memop_idx(memop, idx);
251
+
252
+ orig_memop = memop;
253
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
254
+ memop &= ~MO_BSWAP;
255
+ /* The bswap primitive benefits from zero-extended input. */
256
+ if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
257
+ memop &= ~MO_SIGN;
258
+ }
259
+ }
260
+
261
+ addr = plugin_prep_mem_callbacks(addr);
262
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
263
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
264
+
265
+ if ((orig_memop ^ memop) & MO_BSWAP) {
266
+ int flags = (orig_memop & MO_SIGN
267
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
268
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
269
+ switch (orig_memop & MO_SIZE) {
270
+ case MO_16:
271
+ tcg_gen_bswap16_i64(val, val, flags);
272
+ break;
273
+ case MO_32:
274
+ tcg_gen_bswap32_i64(val, val, flags);
275
+ break;
276
+ case MO_64:
277
+ tcg_gen_bswap64_i64(val, val);
278
+ break;
279
+ default:
280
+ g_assert_not_reached();
281
+ }
282
+ }
283
+}
284
+
285
+void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
286
+{
287
+ TCGv_i64 swap = NULL;
288
+ MemOpIdx oi;
289
+
290
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
291
+ tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
292
+ return;
293
+ }
294
+
295
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
296
+ memop = tcg_canonicalize_memop(memop, 1, 1);
297
+ oi = make_memop_idx(memop, idx);
298
+
299
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
300
+ swap = tcg_temp_ebb_new_i64();
301
+ switch (memop & MO_SIZE) {
302
+ case MO_16:
303
+ tcg_gen_bswap16_i64(swap, val, 0);
304
+ break;
305
+ case MO_32:
306
+ tcg_gen_bswap32_i64(swap, val, 0);
307
+ break;
308
+ case MO_64:
309
+ tcg_gen_bswap64_i64(swap, val);
310
+ break;
311
+ default:
312
+ g_assert_not_reached();
313
+ }
314
+ val = swap;
315
+ memop &= ~MO_BSWAP;
316
+ }
317
+
318
+ addr = plugin_prep_mem_callbacks(addr);
319
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
320
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
321
+
322
+ if (swap) {
323
+ tcg_temp_free_i64(swap);
324
+ }
325
+}
326
+
327
+/*
328
+ * Return true if @mop, without knowledge of the pointer alignment,
329
+ * does not require 16-byte atomicity, and it would be adventagous
330
+ * to avoid a call to a helper function.
331
+ */
332
+static bool use_two_i64_for_i128(MemOp mop)
333
+{
334
+#ifdef CONFIG_SOFTMMU
335
+ /* Two softmmu tlb lookups is larger than one function call. */
336
+ return false;
337
+#else
338
+ /*
339
+ * For user-only, two 64-bit operations may well be smaller than a call.
340
+ * Determine if that would be legal for the requested atomicity.
341
+ */
342
+ switch (mop & MO_ATOM_MASK) {
343
+ case MO_ATOM_NONE:
344
+ case MO_ATOM_IFALIGN_PAIR:
345
+ return true;
346
+ case MO_ATOM_IFALIGN:
347
+ case MO_ATOM_SUBALIGN:
348
+ case MO_ATOM_WITHIN16:
349
+ case MO_ATOM_WITHIN16_PAIR:
350
+ /* In a serialized context, no atomicity is required. */
351
+ return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
352
+ default:
353
+ g_assert_not_reached();
354
+ }
355
+#endif
356
+}
357
+
358
+static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
359
+{
360
+ MemOp mop_1 = orig, mop_2;
361
+
362
+ tcg_debug_assert((orig & MO_SIZE) == MO_128);
363
+ tcg_debug_assert((orig & MO_SIGN) == 0);
364
+
365
+ /* Reduce the size to 64-bit. */
366
+ mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
367
+
368
+ /* Retain the alignment constraints of the original. */
369
+ switch (orig & MO_AMASK) {
370
+ case MO_UNALN:
371
+ case MO_ALIGN_2:
372
+ case MO_ALIGN_4:
373
+ mop_2 = mop_1;
374
+ break;
375
+ case MO_ALIGN_8:
376
+ /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
377
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
378
+ mop_2 = mop_1;
379
+ break;
380
+ case MO_ALIGN:
381
+ /* Second has 8-byte alignment; first has 16-byte alignment. */
382
+ mop_2 = mop_1;
383
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
384
+ break;
385
+ case MO_ALIGN_16:
386
+ case MO_ALIGN_32:
387
+ case MO_ALIGN_64:
388
+ /* Second has 8-byte alignment; first retains original. */
389
+ mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
390
+ break;
391
+ default:
392
+ g_assert_not_reached();
393
+ }
394
+
395
+ /* Use a memory ordering implemented by the host. */
396
+ if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
397
+ mop_1 &= ~MO_BSWAP;
398
+ mop_2 &= ~MO_BSWAP;
399
+ }
400
+
401
+ ret[0] = mop_1;
402
+ ret[1] = mop_2;
403
+}
404
+
405
+#if TARGET_LONG_BITS == 64
406
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
407
+#else
408
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
409
+#endif
410
+
411
+void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
412
+{
413
+ const MemOpIdx oi = make_memop_idx(memop, idx);
414
+
415
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
416
+ tcg_debug_assert((memop & MO_SIGN) == 0);
417
+
418
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
419
+ addr = plugin_prep_mem_callbacks(addr);
420
+
421
+ /* TODO: For now, force 32-bit hosts to use the helper. */
422
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
423
+ TCGv_i64 lo, hi;
424
+ TCGArg addr_arg;
425
+ MemOpIdx adj_oi;
426
+ bool need_bswap = false;
427
+
428
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
429
+ lo = TCGV128_HIGH(val);
430
+ hi = TCGV128_LOW(val);
431
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
432
+ need_bswap = true;
433
+ } else {
434
+ lo = TCGV128_LOW(val);
435
+ hi = TCGV128_HIGH(val);
436
+ adj_oi = oi;
437
+ }
438
+
439
+#if TARGET_LONG_BITS == 32
440
+ addr_arg = tcgv_i32_arg(addr);
441
+#else
442
+ addr_arg = tcgv_i64_arg(addr);
443
+#endif
444
+ tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
445
+
446
+ if (need_bswap) {
447
+ tcg_gen_bswap64_i64(lo, lo);
448
+ tcg_gen_bswap64_i64(hi, hi);
449
+ }
450
+ } else if (use_two_i64_for_i128(memop)) {
451
+ MemOp mop[2];
452
+ TCGv addr_p8;
453
+ TCGv_i64 x, y;
454
+
455
+ canonicalize_memop_i128_as_i64(mop, memop);
456
+
457
+ /*
458
+ * Since there are no global TCGv_i128, there is no visible state
459
+ * changed if the second load faults. Load directly into the two
460
+ * subwords.
461
+ */
462
+ if ((memop & MO_BSWAP) == MO_LE) {
463
+ x = TCGV128_LOW(val);
464
+ y = TCGV128_HIGH(val);
465
+ } else {
466
+ x = TCGV128_HIGH(val);
467
+ y = TCGV128_LOW(val);
468
+ }
469
+
470
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
471
+
472
+ if ((mop[0] ^ memop) & MO_BSWAP) {
473
+ tcg_gen_bswap64_i64(x, x);
474
+ }
475
+
476
+ addr_p8 = tcg_temp_ebb_new();
477
+ tcg_gen_addi_tl(addr_p8, addr, 8);
478
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
479
+ tcg_temp_free(addr_p8);
480
+
481
+ if ((mop[0] ^ memop) & MO_BSWAP) {
482
+ tcg_gen_bswap64_i64(y, y);
483
+ }
484
+ } else {
485
+ gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
486
+ }
487
+
488
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
489
+}
490
+
491
+void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
492
+{
493
+ const MemOpIdx oi = make_memop_idx(memop, idx);
494
+
495
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
496
+ tcg_debug_assert((memop & MO_SIGN) == 0);
497
+
498
+ tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
499
+ addr = plugin_prep_mem_callbacks(addr);
500
+
501
+ /* TODO: For now, force 32-bit hosts to use the helper. */
502
+
503
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
504
+ TCGv_i64 lo, hi;
505
+ TCGArg addr_arg;
506
+ MemOpIdx adj_oi;
507
+ bool need_bswap = false;
508
+
509
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
510
+ lo = tcg_temp_new_i64();
511
+ hi = tcg_temp_new_i64();
512
+ tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
513
+ tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
514
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
515
+ need_bswap = true;
516
+ } else {
517
+ lo = TCGV128_LOW(val);
518
+ hi = TCGV128_HIGH(val);
519
+ adj_oi = oi;
520
+ }
521
+
522
+#if TARGET_LONG_BITS == 32
523
+ addr_arg = tcgv_i32_arg(addr);
524
+#else
525
+ addr_arg = tcgv_i64_arg(addr);
526
+#endif
527
+ tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
528
+
529
+ if (need_bswap) {
530
+ tcg_temp_free_i64(lo);
531
+ tcg_temp_free_i64(hi);
532
+ }
533
+ } else if (use_two_i64_for_i128(memop)) {
534
+ MemOp mop[2];
535
+ TCGv addr_p8;
536
+ TCGv_i64 x, y;
537
+
538
+ canonicalize_memop_i128_as_i64(mop, memop);
539
+
540
+ if ((memop & MO_BSWAP) == MO_LE) {
541
+ x = TCGV128_LOW(val);
542
+ y = TCGV128_HIGH(val);
543
+ } else {
544
+ x = TCGV128_HIGH(val);
545
+ y = TCGV128_LOW(val);
546
+ }
547
+
548
+ addr_p8 = tcg_temp_ebb_new();
549
+ if ((mop[0] ^ memop) & MO_BSWAP) {
550
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
551
+
552
+ tcg_gen_bswap64_i64(t, x);
553
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
554
+ tcg_gen_bswap64_i64(t, y);
555
+ tcg_gen_addi_tl(addr_p8, addr, 8);
556
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
557
+ tcg_temp_free_i64(t);
558
+ } else {
559
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
560
+ tcg_gen_addi_tl(addr_p8, addr, 8);
561
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
562
+ }
563
+ tcg_temp_free(addr_p8);
564
+ } else {
565
+ gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
566
+ }
567
+
568
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
569
+}
570
+
571
+static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
572
+{
573
+ switch (opc & MO_SSIZE) {
574
+ case MO_SB:
575
+ tcg_gen_ext8s_i32(ret, val);
576
+ break;
577
+ case MO_UB:
578
+ tcg_gen_ext8u_i32(ret, val);
579
+ break;
580
+ case MO_SW:
581
+ tcg_gen_ext16s_i32(ret, val);
582
+ break;
583
+ case MO_UW:
584
+ tcg_gen_ext16u_i32(ret, val);
585
+ break;
586
+ default:
587
+ tcg_gen_mov_i32(ret, val);
588
+ break;
589
+ }
590
+}
591
+
592
+static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
593
+{
594
+ switch (opc & MO_SSIZE) {
595
+ case MO_SB:
596
+ tcg_gen_ext8s_i64(ret, val);
597
+ break;
598
+ case MO_UB:
599
+ tcg_gen_ext8u_i64(ret, val);
600
+ break;
601
+ case MO_SW:
602
+ tcg_gen_ext16s_i64(ret, val);
603
+ break;
604
+ case MO_UW:
605
+ tcg_gen_ext16u_i64(ret, val);
606
+ break;
607
+ case MO_SL:
608
+ tcg_gen_ext32s_i64(ret, val);
609
+ break;
610
+ case MO_UL:
611
+ tcg_gen_ext32u_i64(ret, val);
612
+ break;
613
+ default:
614
+ tcg_gen_mov_i64(ret, val);
615
+ break;
616
+ }
617
+}
618
+
619
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
620
+ TCGv_i32, TCGv_i32, TCGv_i32);
621
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
622
+ TCGv_i64, TCGv_i64, TCGv_i32);
623
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
624
+ TCGv_i128, TCGv_i128, TCGv_i32);
625
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
626
+ TCGv_i32, TCGv_i32);
627
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
628
+ TCGv_i64, TCGv_i32);
629
+
630
+#ifdef CONFIG_ATOMIC64
631
+# define WITH_ATOMIC64(X) X,
632
+#else
633
+# define WITH_ATOMIC64(X)
634
+#endif
635
+#ifdef CONFIG_CMPXCHG128
636
+# define WITH_ATOMIC128(X) X,
637
+#else
638
+# define WITH_ATOMIC128(X)
639
+#endif
640
+
641
+static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
642
+ [MO_8] = gen_helper_atomic_cmpxchgb,
643
+ [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
644
+ [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
645
+ [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
646
+ [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
647
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
648
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
649
+ WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
650
+ WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
651
+};
652
+
653
+void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
654
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
655
+{
656
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
657
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
658
+
659
+ tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
660
+
661
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
662
+ tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
663
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
664
+ tcg_temp_free_i32(t2);
665
+
666
+ if (memop & MO_SIGN) {
667
+ tcg_gen_ext_i32(retv, t1, memop);
668
+ } else {
669
+ tcg_gen_mov_i32(retv, t1);
670
+ }
671
+ tcg_temp_free_i32(t1);
672
+}
673
+
674
+void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
675
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
676
+{
677
+ gen_atomic_cx_i32 gen;
678
+ MemOpIdx oi;
679
+
680
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
681
+ tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
682
+ return;
683
+ }
684
+
685
+ memop = tcg_canonicalize_memop(memop, 0, 0);
686
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
687
+ tcg_debug_assert(gen != NULL);
688
+
689
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
690
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
691
+
692
+ if (memop & MO_SIGN) {
693
+ tcg_gen_ext_i32(retv, retv, memop);
694
+ }
695
+}
696
+
697
+void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
698
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
699
+{
700
+ TCGv_i64 t1, t2;
701
+
702
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
703
+ tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
704
+ TCGV_LOW(newv), idx, memop);
705
+ if (memop & MO_SIGN) {
706
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
707
+ } else {
708
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
709
+ }
710
+ return;
711
+ }
712
+
713
+ t1 = tcg_temp_ebb_new_i64();
714
+ t2 = tcg_temp_ebb_new_i64();
715
+
716
+ tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
717
+
718
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
719
+ tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
720
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
721
+ tcg_temp_free_i64(t2);
722
+
723
+ if (memop & MO_SIGN) {
724
+ tcg_gen_ext_i64(retv, t1, memop);
725
+ } else {
726
+ tcg_gen_mov_i64(retv, t1);
727
+ }
728
+ tcg_temp_free_i64(t1);
729
+}
730
+
731
+void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
732
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
733
+{
734
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
735
+ tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
736
+ return;
737
+ }
738
+
739
+ if ((memop & MO_SIZE) == MO_64) {
740
+ gen_atomic_cx_i64 gen;
741
+
742
+ memop = tcg_canonicalize_memop(memop, 1, 0);
743
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
744
+ if (gen) {
745
+ MemOpIdx oi = make_memop_idx(memop, idx);
746
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
747
+ return;
748
+ }
749
+
750
+ gen_helper_exit_atomic(cpu_env);
751
+
752
+ /*
753
+ * Produce a result for a well-formed opcode stream. This satisfies
754
+ * liveness for set before used, which happens before this dead code
755
+ * is removed.
756
+ */
757
+ tcg_gen_movi_i64(retv, 0);
758
+ return;
759
+ }
760
+
761
+ if (TCG_TARGET_REG_BITS == 32) {
762
+ tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
763
+ TCGV_LOW(newv), idx, memop);
764
+ if (memop & MO_SIGN) {
765
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
766
+ } else {
767
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
768
+ }
769
+ } else {
770
+ TCGv_i32 c32 = tcg_temp_ebb_new_i32();
771
+ TCGv_i32 n32 = tcg_temp_ebb_new_i32();
772
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
773
+
774
+ tcg_gen_extrl_i64_i32(c32, cmpv);
775
+ tcg_gen_extrl_i64_i32(n32, newv);
776
+ tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
777
+ tcg_temp_free_i32(c32);
778
+ tcg_temp_free_i32(n32);
779
+
780
+ tcg_gen_extu_i32_i64(retv, r32);
781
+ tcg_temp_free_i32(r32);
782
+
783
+ if (memop & MO_SIGN) {
784
+ tcg_gen_ext_i64(retv, retv, memop);
785
+ }
786
+ }
787
+}
788
+
789
+void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
790
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
791
+{
792
+ if (TCG_TARGET_REG_BITS == 32) {
793
+ /* Inline expansion below is simply too large for 32-bit hosts. */
794
+ gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
795
+ ? gen_helper_nonatomic_cmpxchgo_le
796
+ : gen_helper_nonatomic_cmpxchgo_be);
797
+ MemOpIdx oi = make_memop_idx(memop, idx);
798
+
799
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
800
+ tcg_debug_assert((memop & MO_SIGN) == 0);
801
+
802
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
803
+ } else {
804
+ TCGv_i128 oldv = tcg_temp_ebb_new_i128();
805
+ TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
806
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
807
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
808
+ TCGv_i64 z = tcg_constant_i64(0);
809
+
810
+ tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
811
+
812
+ /* Compare i128 */
813
+ tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
814
+ tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
815
+ tcg_gen_or_i64(t0, t0, t1);
816
+
817
+ /* tmpv = equal ? newv : oldv */
818
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
819
+ TCGV128_LOW(newv), TCGV128_LOW(oldv));
820
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
821
+ TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
822
+
823
+ /* Unconditional writeback. */
824
+ tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
825
+ tcg_gen_mov_i128(retv, oldv);
826
+
827
+ tcg_temp_free_i64(t0);
828
+ tcg_temp_free_i64(t1);
829
+ tcg_temp_free_i128(tmpv);
830
+ tcg_temp_free_i128(oldv);
831
+ }
832
+}
833
+
834
+void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
835
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
836
+{
837
+ gen_atomic_cx_i128 gen;
838
+
839
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
840
+ tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
841
+ return;
842
+ }
843
+
844
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
845
+ tcg_debug_assert((memop & MO_SIGN) == 0);
846
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
847
+
848
+ if (gen) {
849
+ MemOpIdx oi = make_memop_idx(memop, idx);
850
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
851
+ return;
852
+ }
853
+
854
+ gen_helper_exit_atomic(cpu_env);
855
+
856
+ /*
857
+ * Produce a result for a well-formed opcode stream. This satisfies
858
+ * liveness for set before used, which happens before this dead code
859
+ * is removed.
860
+ */
861
+ tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
862
+ tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
863
+}
864
+
865
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
866
+ TCGArg idx, MemOp memop, bool new_val,
867
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
868
+{
869
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
870
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
871
+
872
+ memop = tcg_canonicalize_memop(memop, 0, 0);
873
+
874
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
875
+ tcg_gen_ext_i32(t2, val, memop);
876
+ gen(t2, t1, t2);
877
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
878
+
879
+ tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
880
+ tcg_temp_free_i32(t1);
881
+ tcg_temp_free_i32(t2);
882
+}
883
+
884
+static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
885
+ TCGArg idx, MemOp memop, void * const table[])
886
+{
887
+ gen_atomic_op_i32 gen;
888
+ MemOpIdx oi;
889
+
890
+ memop = tcg_canonicalize_memop(memop, 0, 0);
891
+
892
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
893
+ tcg_debug_assert(gen != NULL);
894
+
895
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
896
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
897
+
898
+ if (memop & MO_SIGN) {
899
+ tcg_gen_ext_i32(ret, ret, memop);
900
+ }
901
+}
902
+
903
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
904
+ TCGArg idx, MemOp memop, bool new_val,
905
+ void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
906
+{
907
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
908
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
909
+
910
+ memop = tcg_canonicalize_memop(memop, 1, 0);
911
+
912
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
913
+ tcg_gen_ext_i64(t2, val, memop);
914
+ gen(t2, t1, t2);
915
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
916
+
917
+ tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
918
+ tcg_temp_free_i64(t1);
919
+ tcg_temp_free_i64(t2);
920
+}
921
+
922
+static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
923
+ TCGArg idx, MemOp memop, void * const table[])
924
+{
925
+ memop = tcg_canonicalize_memop(memop, 1, 0);
926
+
927
+ if ((memop & MO_SIZE) == MO_64) {
928
+#ifdef CONFIG_ATOMIC64
929
+ gen_atomic_op_i64 gen;
930
+ MemOpIdx oi;
931
+
932
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
933
+ tcg_debug_assert(gen != NULL);
934
+
935
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
936
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
937
+#else
938
+ gen_helper_exit_atomic(cpu_env);
939
+ /* Produce a result, so that we have a well-formed opcode stream
940
+ with respect to uses of the result in the (dead) code following. */
941
+ tcg_gen_movi_i64(ret, 0);
942
+#endif /* CONFIG_ATOMIC64 */
943
+ } else {
944
+ TCGv_i32 v32 = tcg_temp_ebb_new_i32();
945
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
946
+
947
+ tcg_gen_extrl_i64_i32(v32, val);
948
+ do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
949
+ tcg_temp_free_i32(v32);
950
+
951
+ tcg_gen_extu_i32_i64(ret, r32);
952
+ tcg_temp_free_i32(r32);
953
+
954
+ if (memop & MO_SIGN) {
955
+ tcg_gen_ext_i64(ret, ret, memop);
956
+ }
957
+ }
958
+}
959
+
960
+#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
961
+static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
962
+ [MO_8] = gen_helper_atomic_##NAME##b, \
963
+ [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
964
+ [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
965
+ [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
966
+ [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
967
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
968
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
969
+}; \
970
+void tcg_gen_atomic_##NAME##_i32 \
971
+ (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
972
+{ \
973
+ if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
974
+ do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
975
+ } else { \
976
+ do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
977
+ tcg_gen_##OP##_i32); \
978
+ } \
979
+} \
980
+void tcg_gen_atomic_##NAME##_i64 \
981
+ (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
982
+{ \
983
+ if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
984
+ do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
985
+ } else { \
986
+ do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
987
+ tcg_gen_##OP##_i64); \
988
+ } \
989
+}
990
+
991
+GEN_ATOMIC_HELPER(fetch_add, add, 0)
992
+GEN_ATOMIC_HELPER(fetch_and, and, 0)
993
+GEN_ATOMIC_HELPER(fetch_or, or, 0)
994
+GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
995
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
996
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
997
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
998
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
999
+
1000
+GEN_ATOMIC_HELPER(add_fetch, add, 1)
1001
+GEN_ATOMIC_HELPER(and_fetch, and, 1)
1002
+GEN_ATOMIC_HELPER(or_fetch, or, 1)
1003
+GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1004
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1005
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1006
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1007
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1008
+
1009
+static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1010
+{
1011
+ tcg_gen_mov_i32(r, b);
1012
+}
1013
+
1014
+static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1015
+{
1016
+ tcg_gen_mov_i64(r, b);
1017
+}
1018
+
1019
+GEN_ATOMIC_HELPER(xchg, mov2, 0)
1020
+
1021
+#undef GEN_ATOMIC_HELPER
1022
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
1023
index XXXXXXX..XXXXXXX 100644
1024
--- a/tcg/tcg-op.c
1025
+++ b/tcg/tcg-op.c
1026
@@ -XXX,XX +XXX,XX @@
1027
#include "tcg/tcg.h"
1028
#include "tcg/tcg-temp-internal.h"
1029
#include "tcg/tcg-op.h"
1030
-#include "tcg/tcg-mo.h"
1031
#include "exec/plugin-gen.h"
1032
#include "tcg-internal.h"
1033
1034
@@ -XXX,XX +XXX,XX @@ void tcg_gen_lookup_and_goto_ptr(void)
1035
tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
1036
tcg_temp_free_ptr(ptr);
1037
}
1038
-
1039
-static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
1040
-{
1041
- /* Trigger the asserts within as early as possible. */
1042
- unsigned a_bits = get_alignment_bits(op);
1043
-
1044
- /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
1045
- if (a_bits == (op & MO_SIZE)) {
1046
- op = (op & ~MO_AMASK) | MO_ALIGN;
1047
- }
1048
-
1049
- switch (op & MO_SIZE) {
1050
- case MO_8:
1051
- op &= ~MO_BSWAP;
1052
- break;
1053
- case MO_16:
1054
- break;
1055
- case MO_32:
1056
- if (!is64) {
1057
- op &= ~MO_SIGN;
1058
- }
1059
- break;
1060
- case MO_64:
1061
- if (is64) {
1062
- op &= ~MO_SIGN;
1063
- break;
1064
- }
1065
- /* fall through */
1066
- default:
1067
- g_assert_not_reached();
1068
- }
1069
- if (st) {
1070
- op &= ~MO_SIGN;
1071
- }
1072
- return op;
1073
-}
1074
-
1075
-static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
1076
- MemOp memop, TCGArg idx)
1077
-{
1078
- MemOpIdx oi = make_memop_idx(memop, idx);
1079
-#if TARGET_LONG_BITS == 32
1080
- tcg_gen_op3i_i32(opc, val, addr, oi);
1081
-#else
1082
- if (TCG_TARGET_REG_BITS == 32) {
1083
- tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
1084
- } else {
1085
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
1086
- }
1087
-#endif
1088
-}
1089
-
1090
-static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
1091
- MemOp memop, TCGArg idx)
1092
-{
1093
- MemOpIdx oi = make_memop_idx(memop, idx);
1094
-#if TARGET_LONG_BITS == 32
1095
- if (TCG_TARGET_REG_BITS == 32) {
1096
- tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
1097
- } else {
1098
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
1099
- }
1100
-#else
1101
- if (TCG_TARGET_REG_BITS == 32) {
1102
- tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
1103
- TCGV_LOW(addr), TCGV_HIGH(addr), oi);
1104
- } else {
1105
- tcg_gen_op3i_i64(opc, val, addr, oi);
1106
- }
1107
-#endif
1108
-}
1109
-
1110
-static void tcg_gen_req_mo(TCGBar type)
1111
-{
1112
-#ifdef TCG_GUEST_DEFAULT_MO
1113
- type &= TCG_GUEST_DEFAULT_MO;
1114
-#endif
1115
- type &= ~TCG_TARGET_DEFAULT_MO;
1116
- if (type) {
1117
- tcg_gen_mb(type | TCG_BAR_SC);
1118
- }
1119
-}
1120
-
1121
-static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
1122
-{
1123
-#ifdef CONFIG_PLUGIN
1124
- if (tcg_ctx->plugin_insn != NULL) {
1125
- /* Save a copy of the vaddr for use after a load. */
1126
- TCGv temp = tcg_temp_new();
1127
- tcg_gen_mov_tl(temp, vaddr);
1128
- return temp;
1129
- }
1130
-#endif
1131
- return vaddr;
1132
-}
1133
-
1134
-static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
1135
- enum qemu_plugin_mem_rw rw)
1136
-{
1137
-#ifdef CONFIG_PLUGIN
1138
- if (tcg_ctx->plugin_insn != NULL) {
1139
- qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
1140
- plugin_gen_empty_mem_callback(vaddr, info);
1141
- tcg_temp_free(vaddr);
1142
- }
1143
-#endif
1144
-}
1145
-
1146
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1147
-{
1148
- MemOp orig_memop;
1149
- MemOpIdx oi;
1150
-
1151
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1152
- memop = tcg_canonicalize_memop(memop, 0, 0);
1153
- oi = make_memop_idx(memop, idx);
1154
-
1155
- orig_memop = memop;
1156
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1157
- memop &= ~MO_BSWAP;
1158
- /* The bswap primitive benefits from zero-extended input. */
1159
- if ((memop & MO_SSIZE) == MO_SW) {
1160
- memop &= ~MO_SIGN;
1161
- }
1162
- }
1163
-
1164
- addr = plugin_prep_mem_callbacks(addr);
1165
- gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
1166
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1167
-
1168
- if ((orig_memop ^ memop) & MO_BSWAP) {
1169
- switch (orig_memop & MO_SIZE) {
1170
- case MO_16:
1171
- tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
1172
- ? TCG_BSWAP_IZ | TCG_BSWAP_OS
1173
- : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
1174
- break;
1175
- case MO_32:
1176
- tcg_gen_bswap32_i32(val, val);
1177
- break;
1178
- default:
1179
- g_assert_not_reached();
1180
- }
1181
- }
1182
-}
1183
-
1184
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1185
-{
1186
- TCGv_i32 swap = NULL;
1187
- MemOpIdx oi;
1188
-
1189
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1190
- memop = tcg_canonicalize_memop(memop, 0, 1);
1191
- oi = make_memop_idx(memop, idx);
1192
-
1193
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1194
- swap = tcg_temp_ebb_new_i32();
1195
- switch (memop & MO_SIZE) {
1196
- case MO_16:
1197
- tcg_gen_bswap16_i32(swap, val, 0);
1198
- break;
1199
- case MO_32:
1200
- tcg_gen_bswap32_i32(swap, val);
1201
- break;
1202
- default:
1203
- g_assert_not_reached();
1204
- }
1205
- val = swap;
1206
- memop &= ~MO_BSWAP;
1207
- }
1208
-
1209
- addr = plugin_prep_mem_callbacks(addr);
1210
- if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
1211
- gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
1212
- } else {
1213
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
1214
- }
1215
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1216
-
1217
- if (swap) {
1218
- tcg_temp_free_i32(swap);
1219
- }
1220
-}
1221
-
1222
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1223
-{
1224
- MemOp orig_memop;
1225
- MemOpIdx oi;
1226
-
1227
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1228
- tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
1229
- if (memop & MO_SIGN) {
1230
- tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
1231
- } else {
1232
- tcg_gen_movi_i32(TCGV_HIGH(val), 0);
1233
- }
1234
- return;
1235
- }
1236
-
1237
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1238
- memop = tcg_canonicalize_memop(memop, 1, 0);
1239
- oi = make_memop_idx(memop, idx);
1240
-
1241
- orig_memop = memop;
1242
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1243
- memop &= ~MO_BSWAP;
1244
- /* The bswap primitive benefits from zero-extended input. */
1245
- if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
1246
- memop &= ~MO_SIGN;
1247
- }
1248
- }
1249
-
1250
- addr = plugin_prep_mem_callbacks(addr);
1251
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
1252
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1253
-
1254
- if ((orig_memop ^ memop) & MO_BSWAP) {
1255
- int flags = (orig_memop & MO_SIGN
1256
- ? TCG_BSWAP_IZ | TCG_BSWAP_OS
1257
- : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
1258
- switch (orig_memop & MO_SIZE) {
1259
- case MO_16:
1260
- tcg_gen_bswap16_i64(val, val, flags);
1261
- break;
1262
- case MO_32:
1263
- tcg_gen_bswap32_i64(val, val, flags);
1264
- break;
1265
- case MO_64:
1266
- tcg_gen_bswap64_i64(val, val);
1267
- break;
1268
- default:
1269
- g_assert_not_reached();
1270
- }
1271
- }
1272
-}
1273
-
1274
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1275
-{
1276
- TCGv_i64 swap = NULL;
1277
- MemOpIdx oi;
1278
-
1279
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1280
- tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
1281
- return;
1282
- }
1283
-
1284
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1285
- memop = tcg_canonicalize_memop(memop, 1, 1);
1286
- oi = make_memop_idx(memop, idx);
1287
-
1288
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1289
- swap = tcg_temp_ebb_new_i64();
1290
- switch (memop & MO_SIZE) {
1291
- case MO_16:
1292
- tcg_gen_bswap16_i64(swap, val, 0);
1293
- break;
1294
- case MO_32:
1295
- tcg_gen_bswap32_i64(swap, val, 0);
1296
- break;
1297
- case MO_64:
1298
- tcg_gen_bswap64_i64(swap, val);
1299
- break;
1300
- default:
1301
- g_assert_not_reached();
1302
- }
1303
- val = swap;
1304
- memop &= ~MO_BSWAP;
1305
- }
1306
-
1307
- addr = plugin_prep_mem_callbacks(addr);
1308
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
1309
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1310
-
1311
- if (swap) {
1312
- tcg_temp_free_i64(swap);
1313
- }
1314
-}
1315
-
1316
-/*
1317
- * Return true if @mop, without knowledge of the pointer alignment,
1318
- * does not require 16-byte atomicity, and it would be adventagous
1319
- * to avoid a call to a helper function.
1320
- */
1321
-static bool use_two_i64_for_i128(MemOp mop)
1322
-{
1323
-#ifdef CONFIG_SOFTMMU
1324
- /* Two softmmu tlb lookups is larger than one function call. */
1325
- return false;
1326
-#else
1327
- /*
1328
- * For user-only, two 64-bit operations may well be smaller than a call.
1329
- * Determine if that would be legal for the requested atomicity.
1330
- */
1331
- switch (mop & MO_ATOM_MASK) {
1332
- case MO_ATOM_NONE:
1333
- case MO_ATOM_IFALIGN_PAIR:
1334
- return true;
1335
- case MO_ATOM_IFALIGN:
1336
- case MO_ATOM_SUBALIGN:
1337
- case MO_ATOM_WITHIN16:
1338
- case MO_ATOM_WITHIN16_PAIR:
1339
- /* In a serialized context, no atomicity is required. */
1340
- return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
1341
- default:
1342
- g_assert_not_reached();
1343
- }
1344
-#endif
1345
-}
1346
-
1347
-static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
1348
-{
1349
- MemOp mop_1 = orig, mop_2;
1350
-
1351
- tcg_debug_assert((orig & MO_SIZE) == MO_128);
1352
- tcg_debug_assert((orig & MO_SIGN) == 0);
1353
-
1354
- /* Reduce the size to 64-bit. */
1355
- mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
1356
-
1357
- /* Retain the alignment constraints of the original. */
1358
- switch (orig & MO_AMASK) {
1359
- case MO_UNALN:
1360
- case MO_ALIGN_2:
1361
- case MO_ALIGN_4:
1362
- mop_2 = mop_1;
1363
- break;
1364
- case MO_ALIGN_8:
1365
- /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
1366
- mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
1367
- mop_2 = mop_1;
1368
- break;
1369
- case MO_ALIGN:
1370
- /* Second has 8-byte alignment; first has 16-byte alignment. */
1371
- mop_2 = mop_1;
1372
- mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
1373
- break;
1374
- case MO_ALIGN_16:
1375
- case MO_ALIGN_32:
1376
- case MO_ALIGN_64:
1377
- /* Second has 8-byte alignment; first retains original. */
1378
- mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
1379
- break;
1380
- default:
1381
- g_assert_not_reached();
1382
- }
1383
-
1384
- /* Use a memory ordering implemented by the host. */
1385
- if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
1386
- mop_1 &= ~MO_BSWAP;
1387
- mop_2 &= ~MO_BSWAP;
1388
- }
1389
-
1390
- ret[0] = mop_1;
1391
- ret[1] = mop_2;
1392
-}
1393
-
1394
-#if TARGET_LONG_BITS == 64
1395
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
1396
-#else
1397
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
1398
-#endif
1399
-
1400
-void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1401
-{
1402
- const MemOpIdx oi = make_memop_idx(memop, idx);
1403
-
1404
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1405
- tcg_debug_assert((memop & MO_SIGN) == 0);
1406
-
1407
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1408
- addr = plugin_prep_mem_callbacks(addr);
1409
-
1410
- /* TODO: For now, force 32-bit hosts to use the helper. */
1411
- if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
1412
- TCGv_i64 lo, hi;
1413
- TCGArg addr_arg;
1414
- MemOpIdx adj_oi;
1415
- bool need_bswap = false;
1416
-
1417
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1418
- lo = TCGV128_HIGH(val);
1419
- hi = TCGV128_LOW(val);
1420
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
1421
- need_bswap = true;
1422
- } else {
1423
- lo = TCGV128_LOW(val);
1424
- hi = TCGV128_HIGH(val);
1425
- adj_oi = oi;
1426
- }
1427
-
1428
-#if TARGET_LONG_BITS == 32
1429
- addr_arg = tcgv_i32_arg(addr);
1430
-#else
1431
- addr_arg = tcgv_i64_arg(addr);
1432
-#endif
1433
- tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
1434
-
1435
- if (need_bswap) {
1436
- tcg_gen_bswap64_i64(lo, lo);
1437
- tcg_gen_bswap64_i64(hi, hi);
1438
- }
1439
- } else if (use_two_i64_for_i128(memop)) {
1440
- MemOp mop[2];
1441
- TCGv addr_p8;
1442
- TCGv_i64 x, y;
1443
-
1444
- canonicalize_memop_i128_as_i64(mop, memop);
1445
-
1446
- /*
1447
- * Since there are no global TCGv_i128, there is no visible state
1448
- * changed if the second load faults. Load directly into the two
1449
- * subwords.
1450
- */
1451
- if ((memop & MO_BSWAP) == MO_LE) {
1452
- x = TCGV128_LOW(val);
1453
- y = TCGV128_HIGH(val);
1454
- } else {
1455
- x = TCGV128_HIGH(val);
1456
- y = TCGV128_LOW(val);
1457
- }
1458
-
1459
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
1460
-
1461
- if ((mop[0] ^ memop) & MO_BSWAP) {
1462
- tcg_gen_bswap64_i64(x, x);
1463
- }
1464
-
1465
- addr_p8 = tcg_temp_ebb_new();
1466
- tcg_gen_addi_tl(addr_p8, addr, 8);
1467
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
1468
- tcg_temp_free(addr_p8);
1469
-
1470
- if ((mop[0] ^ memop) & MO_BSWAP) {
1471
- tcg_gen_bswap64_i64(y, y);
1472
- }
1473
- } else {
1474
- gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
1475
- }
1476
-
1477
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1478
-}
1479
-
1480
-void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1481
-{
1482
- const MemOpIdx oi = make_memop_idx(memop, idx);
1483
-
1484
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1485
- tcg_debug_assert((memop & MO_SIGN) == 0);
1486
-
1487
- tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
1488
- addr = plugin_prep_mem_callbacks(addr);
1489
-
1490
- /* TODO: For now, force 32-bit hosts to use the helper. */
1491
-
1492
- if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
1493
- TCGv_i64 lo, hi;
1494
- TCGArg addr_arg;
1495
- MemOpIdx adj_oi;
1496
- bool need_bswap = false;
1497
-
1498
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1499
- lo = tcg_temp_new_i64();
1500
- hi = tcg_temp_new_i64();
1501
- tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
1502
- tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
1503
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
1504
- need_bswap = true;
1505
- } else {
1506
- lo = TCGV128_LOW(val);
1507
- hi = TCGV128_HIGH(val);
1508
- adj_oi = oi;
1509
- }
1510
-
1511
-#if TARGET_LONG_BITS == 32
1512
- addr_arg = tcgv_i32_arg(addr);
1513
-#else
1514
- addr_arg = tcgv_i64_arg(addr);
1515
-#endif
1516
- tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
1517
-
1518
- if (need_bswap) {
1519
- tcg_temp_free_i64(lo);
1520
- tcg_temp_free_i64(hi);
1521
- }
1522
- } else if (use_two_i64_for_i128(memop)) {
1523
- MemOp mop[2];
1524
- TCGv addr_p8;
1525
- TCGv_i64 x, y;
1526
-
1527
- canonicalize_memop_i128_as_i64(mop, memop);
1528
-
1529
- if ((memop & MO_BSWAP) == MO_LE) {
1530
- x = TCGV128_LOW(val);
1531
- y = TCGV128_HIGH(val);
1532
- } else {
1533
- x = TCGV128_HIGH(val);
1534
- y = TCGV128_LOW(val);
1535
- }
1536
-
1537
- addr_p8 = tcg_temp_ebb_new();
1538
- if ((mop[0] ^ memop) & MO_BSWAP) {
1539
- TCGv_i64 t = tcg_temp_ebb_new_i64();
1540
-
1541
- tcg_gen_bswap64_i64(t, x);
1542
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
1543
- tcg_gen_bswap64_i64(t, y);
1544
- tcg_gen_addi_tl(addr_p8, addr, 8);
1545
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
1546
- tcg_temp_free_i64(t);
1547
- } else {
1548
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
1549
- tcg_gen_addi_tl(addr_p8, addr, 8);
1550
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
1551
- }
1552
- tcg_temp_free(addr_p8);
1553
- } else {
1554
- gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
1555
- }
1556
-
1557
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1558
-}
1559
-
1560
-static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
1561
-{
1562
- switch (opc & MO_SSIZE) {
1563
- case MO_SB:
1564
- tcg_gen_ext8s_i32(ret, val);
1565
- break;
1566
- case MO_UB:
1567
- tcg_gen_ext8u_i32(ret, val);
1568
- break;
1569
- case MO_SW:
1570
- tcg_gen_ext16s_i32(ret, val);
1571
- break;
1572
- case MO_UW:
1573
- tcg_gen_ext16u_i32(ret, val);
1574
- break;
1575
- default:
1576
- tcg_gen_mov_i32(ret, val);
1577
- break;
1578
- }
1579
-}
1580
-
1581
-static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
1582
-{
1583
- switch (opc & MO_SSIZE) {
1584
- case MO_SB:
1585
- tcg_gen_ext8s_i64(ret, val);
1586
- break;
1587
- case MO_UB:
1588
- tcg_gen_ext8u_i64(ret, val);
1589
- break;
1590
- case MO_SW:
1591
- tcg_gen_ext16s_i64(ret, val);
1592
- break;
1593
- case MO_UW:
1594
- tcg_gen_ext16u_i64(ret, val);
1595
- break;
1596
- case MO_SL:
1597
- tcg_gen_ext32s_i64(ret, val);
1598
- break;
1599
- case MO_UL:
1600
- tcg_gen_ext32u_i64(ret, val);
1601
- break;
1602
- default:
1603
- tcg_gen_mov_i64(ret, val);
1604
- break;
1605
- }
1606
-}
1607
-
1608
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
1609
- TCGv_i32, TCGv_i32, TCGv_i32);
1610
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
1611
- TCGv_i64, TCGv_i64, TCGv_i32);
1612
-typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
1613
- TCGv_i128, TCGv_i128, TCGv_i32);
1614
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
1615
- TCGv_i32, TCGv_i32);
1616
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
1617
- TCGv_i64, TCGv_i32);
1618
-
1619
-#ifdef CONFIG_ATOMIC64
1620
-# define WITH_ATOMIC64(X) X,
1621
-#else
1622
-# define WITH_ATOMIC64(X)
1623
-#endif
1624
-#ifdef CONFIG_CMPXCHG128
1625
-# define WITH_ATOMIC128(X) X,
1626
-#else
1627
-# define WITH_ATOMIC128(X)
1628
-#endif
1629
-
1630
-static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
1631
- [MO_8] = gen_helper_atomic_cmpxchgb,
1632
- [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
1633
- [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
1634
- [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
1635
- [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
1636
- WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
1637
- WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
1638
- WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
1639
- WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
1640
-};
1641
-
1642
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1643
- TCGv_i32 newv, TCGArg idx, MemOp memop)
1644
-{
1645
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1646
- TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1647
-
1648
- tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
1649
-
1650
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
1651
- tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
1652
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
1653
- tcg_temp_free_i32(t2);
1654
-
1655
- if (memop & MO_SIGN) {
1656
- tcg_gen_ext_i32(retv, t1, memop);
1657
- } else {
1658
- tcg_gen_mov_i32(retv, t1);
1659
- }
1660
- tcg_temp_free_i32(t1);
1661
-}
1662
-
1663
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1664
- TCGv_i32 newv, TCGArg idx, MemOp memop)
1665
-{
1666
- gen_atomic_cx_i32 gen;
1667
- MemOpIdx oi;
1668
-
1669
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1670
- tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
1671
- return;
1672
- }
1673
-
1674
- memop = tcg_canonicalize_memop(memop, 0, 0);
1675
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1676
- tcg_debug_assert(gen != NULL);
1677
-
1678
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1679
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1680
-
1681
- if (memop & MO_SIGN) {
1682
- tcg_gen_ext_i32(retv, retv, memop);
1683
- }
1684
-}
1685
-
1686
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1687
- TCGv_i64 newv, TCGArg idx, MemOp memop)
1688
-{
1689
- TCGv_i64 t1, t2;
1690
-
1691
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1692
- tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1693
- TCGV_LOW(newv), idx, memop);
1694
- if (memop & MO_SIGN) {
1695
- tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1696
- } else {
1697
- tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1698
- }
1699
- return;
1700
- }
1701
-
1702
- t1 = tcg_temp_ebb_new_i64();
1703
- t2 = tcg_temp_ebb_new_i64();
1704
-
1705
- tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
1706
-
1707
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
1708
- tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
1709
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
1710
- tcg_temp_free_i64(t2);
1711
-
1712
- if (memop & MO_SIGN) {
1713
- tcg_gen_ext_i64(retv, t1, memop);
1714
- } else {
1715
- tcg_gen_mov_i64(retv, t1);
1716
- }
1717
- tcg_temp_free_i64(t1);
1718
-}
1719
-
1720
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1721
- TCGv_i64 newv, TCGArg idx, MemOp memop)
1722
-{
1723
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1724
- tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
1725
- return;
1726
- }
1727
-
1728
- if ((memop & MO_SIZE) == MO_64) {
1729
- gen_atomic_cx_i64 gen;
1730
-
1731
- memop = tcg_canonicalize_memop(memop, 1, 0);
1732
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1733
- if (gen) {
1734
- MemOpIdx oi = make_memop_idx(memop, idx);
1735
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1736
- return;
1737
- }
1738
-
1739
- gen_helper_exit_atomic(cpu_env);
1740
-
1741
- /*
1742
- * Produce a result for a well-formed opcode stream. This satisfies
1743
- * liveness for set before used, which happens before this dead code
1744
- * is removed.
1745
- */
1746
- tcg_gen_movi_i64(retv, 0);
1747
- return;
1748
- }
1749
-
1750
- if (TCG_TARGET_REG_BITS == 32) {
1751
- tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1752
- TCGV_LOW(newv), idx, memop);
1753
- if (memop & MO_SIGN) {
1754
- tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1755
- } else {
1756
- tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1757
- }
1758
- } else {
1759
- TCGv_i32 c32 = tcg_temp_ebb_new_i32();
1760
- TCGv_i32 n32 = tcg_temp_ebb_new_i32();
1761
- TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1762
-
1763
- tcg_gen_extrl_i64_i32(c32, cmpv);
1764
- tcg_gen_extrl_i64_i32(n32, newv);
1765
- tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
1766
- tcg_temp_free_i32(c32);
1767
- tcg_temp_free_i32(n32);
1768
-
1769
- tcg_gen_extu_i32_i64(retv, r32);
1770
- tcg_temp_free_i32(r32);
1771
-
1772
- if (memop & MO_SIGN) {
1773
- tcg_gen_ext_i64(retv, retv, memop);
1774
- }
1775
- }
1776
-}
1777
-
1778
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1779
- TCGv_i128 newv, TCGArg idx, MemOp memop)
1780
-{
1781
- if (TCG_TARGET_REG_BITS == 32) {
1782
- /* Inline expansion below is simply too large for 32-bit hosts. */
1783
- gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
1784
- ? gen_helper_nonatomic_cmpxchgo_le
1785
- : gen_helper_nonatomic_cmpxchgo_be);
1786
- MemOpIdx oi = make_memop_idx(memop, idx);
1787
-
1788
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1789
- tcg_debug_assert((memop & MO_SIGN) == 0);
1790
-
1791
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1792
- } else {
1793
- TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1794
- TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1795
- TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1796
- TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1797
- TCGv_i64 z = tcg_constant_i64(0);
1798
-
1799
- tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
1800
-
1801
- /* Compare i128 */
1802
- tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
1803
- tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
1804
- tcg_gen_or_i64(t0, t0, t1);
1805
-
1806
- /* tmpv = equal ? newv : oldv */
1807
- tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
1808
- TCGV128_LOW(newv), TCGV128_LOW(oldv));
1809
- tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1810
- TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1811
-
1812
- /* Unconditional writeback. */
1813
- tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
1814
- tcg_gen_mov_i128(retv, oldv);
1815
-
1816
- tcg_temp_free_i64(t0);
1817
- tcg_temp_free_i64(t1);
1818
- tcg_temp_free_i128(tmpv);
1819
- tcg_temp_free_i128(oldv);
1820
- }
1821
-}
1822
-
1823
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1824
- TCGv_i128 newv, TCGArg idx, MemOp memop)
1825
-{
1826
- gen_atomic_cx_i128 gen;
1827
-
1828
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1829
- tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
1830
- return;
1831
- }
1832
-
1833
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1834
- tcg_debug_assert((memop & MO_SIGN) == 0);
1835
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1836
-
1837
- if (gen) {
1838
- MemOpIdx oi = make_memop_idx(memop, idx);
1839
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1840
- return;
1841
- }
1842
-
1843
- gen_helper_exit_atomic(cpu_env);
1844
-
1845
- /*
1846
- * Produce a result for a well-formed opcode stream. This satisfies
1847
- * liveness for set before used, which happens before this dead code
1848
- * is removed.
1849
- */
1850
- tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1851
- tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1852
-}
1853
-
1854
-static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1855
- TCGArg idx, MemOp memop, bool new_val,
1856
- void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1857
-{
1858
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1859
- TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1860
-
1861
- memop = tcg_canonicalize_memop(memop, 0, 0);
1862
-
1863
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
1864
- tcg_gen_ext_i32(t2, val, memop);
1865
- gen(t2, t1, t2);
1866
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
1867
-
1868
- tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1869
- tcg_temp_free_i32(t1);
1870
- tcg_temp_free_i32(t2);
1871
-}
1872
-
1873
-static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1874
- TCGArg idx, MemOp memop, void * const table[])
1875
-{
1876
- gen_atomic_op_i32 gen;
1877
- MemOpIdx oi;
1878
-
1879
- memop = tcg_canonicalize_memop(memop, 0, 0);
1880
-
1881
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
1882
- tcg_debug_assert(gen != NULL);
1883
-
1884
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1885
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
1886
-
1887
- if (memop & MO_SIGN) {
1888
- tcg_gen_ext_i32(ret, ret, memop);
1889
- }
1890
-}
1891
-
1892
-static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1893
- TCGArg idx, MemOp memop, bool new_val,
1894
- void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1895
-{
1896
- TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1897
- TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1898
-
1899
- memop = tcg_canonicalize_memop(memop, 1, 0);
1900
-
1901
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
1902
- tcg_gen_ext_i64(t2, val, memop);
1903
- gen(t2, t1, t2);
1904
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
1905
-
1906
- tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1907
- tcg_temp_free_i64(t1);
1908
- tcg_temp_free_i64(t2);
1909
-}
1910
-
1911
-static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1912
- TCGArg idx, MemOp memop, void * const table[])
1913
-{
1914
- memop = tcg_canonicalize_memop(memop, 1, 0);
1915
-
1916
- if ((memop & MO_SIZE) == MO_64) {
1917
-#ifdef CONFIG_ATOMIC64
1918
- gen_atomic_op_i64 gen;
1919
- MemOpIdx oi;
1920
-
1921
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
1922
- tcg_debug_assert(gen != NULL);
1923
-
1924
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1925
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
1926
-#else
1927
- gen_helper_exit_atomic(cpu_env);
1928
- /* Produce a result, so that we have a well-formed opcode stream
1929
- with respect to uses of the result in the (dead) code following. */
1930
- tcg_gen_movi_i64(ret, 0);
1931
-#endif /* CONFIG_ATOMIC64 */
1932
- } else {
1933
- TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1934
- TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1935
-
1936
- tcg_gen_extrl_i64_i32(v32, val);
1937
- do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1938
- tcg_temp_free_i32(v32);
1939
-
1940
- tcg_gen_extu_i32_i64(ret, r32);
1941
- tcg_temp_free_i32(r32);
1942
-
1943
- if (memop & MO_SIGN) {
1944
- tcg_gen_ext_i64(ret, ret, memop);
1945
- }
1946
- }
1947
-}
1948
-
1949
-#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
1950
-static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
1951
- [MO_8] = gen_helper_atomic_##NAME##b, \
1952
- [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
1953
- [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
1954
- [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
1955
- [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
1956
- WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
1957
- WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
1958
-}; \
1959
-void tcg_gen_atomic_##NAME##_i32 \
1960
- (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
1961
-{ \
1962
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1963
- do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
1964
- } else { \
1965
- do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
1966
- tcg_gen_##OP##_i32); \
1967
- } \
1968
-} \
1969
-void tcg_gen_atomic_##NAME##_i64 \
1970
- (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
1971
-{ \
1972
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1973
- do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
1974
- } else { \
1975
- do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
1976
- tcg_gen_##OP##_i64); \
1977
- } \
1978
-}
1979
-
1980
-GEN_ATOMIC_HELPER(fetch_add, add, 0)
1981
-GEN_ATOMIC_HELPER(fetch_and, and, 0)
1982
-GEN_ATOMIC_HELPER(fetch_or, or, 0)
1983
-GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1984
-GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1985
-GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1986
-GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1987
-GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1988
-
1989
-GEN_ATOMIC_HELPER(add_fetch, add, 1)
1990
-GEN_ATOMIC_HELPER(and_fetch, and, 1)
1991
-GEN_ATOMIC_HELPER(or_fetch, or, 1)
1992
-GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1993
-GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1994
-GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1995
-GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1996
-GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1997
-
1998
-static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1999
-{
2000
- tcg_gen_mov_i32(r, b);
2001
-}
2002
-
2003
-static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
2004
-{
2005
- tcg_gen_mov_i64(r, b);
2006
-}
2007
-
2008
-GEN_ATOMIC_HELPER(xchg, mov2, 0)
2009
-
2010
-#undef GEN_ATOMIC_HELPER
2011
diff --git a/tcg/meson.build b/tcg/meson.build
2012
index XXXXXXX..XXXXXXX 100644
2013
--- a/tcg/meson.build
2014
+++ b/tcg/meson.build
2015
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(files(
2016
'tcg.c',
2017
'tcg-common.c',
2018
'tcg-op.c',
2019
+ 'tcg-op-ldst.c',
2020
'tcg-op-gvec.c',
2021
'tcg-op-vec.c',
2022
))
2023
--
2024
2.34.1
2025
2026
diff view generated by jsdifflib
New patch
1
1
We already pass uint64_t to restore_state_to_opc; this changes all
2
of the other uses from insn_start through the encoding to decoding.
3
4
Reviewed-by: Anton Johansson <anjo@rev.ng>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op.h | 39 +++++++++------------------------------
9
include/tcg/tcg-opc.h | 2 +-
10
include/tcg/tcg.h | 30 +++++++++++++++---------------
11
accel/tcg/translate-all.c | 28 ++++++++++++++++------------
12
tcg/tcg.c | 18 ++++--------------
13
5 files changed, 45 insertions(+), 72 deletions(-)
14
15
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op.h
18
+++ b/include/tcg/tcg-op.h
19
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
20
#endif
21
22
#if TARGET_INSN_START_WORDS == 1
23
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
24
static inline void tcg_gen_insn_start(target_ulong pc)
25
{
26
- tcg_gen_op1(INDEX_op_insn_start, pc);
27
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS);
28
+ tcg_set_insn_start_param(op, 0, pc);
29
}
30
-# else
31
-static inline void tcg_gen_insn_start(target_ulong pc)
32
-{
33
- tcg_gen_op2(INDEX_op_insn_start, (uint32_t)pc, (uint32_t)(pc >> 32));
34
-}
35
-# endif
36
#elif TARGET_INSN_START_WORDS == 2
37
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
38
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
39
{
40
- tcg_gen_op2(INDEX_op_insn_start, pc, a1);
41
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 2 * 64 / TCG_TARGET_REG_BITS);
42
+ tcg_set_insn_start_param(op, 0, pc);
43
+ tcg_set_insn_start_param(op, 1, a1);
44
}
45
-# else
46
-static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
47
-{
48
- tcg_gen_op4(INDEX_op_insn_start,
49
- (uint32_t)pc, (uint32_t)(pc >> 32),
50
- (uint32_t)a1, (uint32_t)(a1 >> 32));
51
-}
52
-# endif
53
#elif TARGET_INSN_START_WORDS == 3
54
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
55
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
56
target_ulong a2)
57
{
58
- tcg_gen_op3(INDEX_op_insn_start, pc, a1, a2);
59
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 3 * 64 / TCG_TARGET_REG_BITS);
60
+ tcg_set_insn_start_param(op, 0, pc);
61
+ tcg_set_insn_start_param(op, 1, a1);
62
+ tcg_set_insn_start_param(op, 2, a2);
63
}
64
-# else
65
-static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
66
- target_ulong a2)
67
-{
68
- tcg_gen_op6(INDEX_op_insn_start,
69
- (uint32_t)pc, (uint32_t)(pc >> 32),
70
- (uint32_t)a1, (uint32_t)(a1 >> 32),
71
- (uint32_t)a2, (uint32_t)(a2 >> 32));
72
-}
73
-# endif
74
#else
75
# error "Unhandled number of operands to insn_start"
76
#endif
77
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/tcg/tcg-opc.h
80
+++ b/include/tcg/tcg-opc.h
81
@@ -XXX,XX +XXX,XX @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
82
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
83
84
/* QEMU specific */
85
-DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
86
+DEF(insn_start, 0, 0, DATA64_ARGS * TARGET_INSN_START_WORDS,
87
TCG_OPF_NOT_PRESENT)
88
DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
89
DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
90
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
91
index XXXXXXX..XXXXXXX 100644
92
--- a/include/tcg/tcg.h
93
+++ b/include/tcg/tcg.h
94
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
95
TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS];
96
97
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
98
- target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
99
+ uint64_t gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
100
101
/* Exit to translator on overflow. */
102
sigjmp_buf jmp_trans;
103
@@ -XXX,XX +XXX,XX @@ static inline void tcg_set_insn_param(TCGOp *op, int arg, TCGArg v)
104
op->args[arg] = v;
105
}
106
107
-static inline target_ulong tcg_get_insn_start_param(TCGOp *op, int arg)
108
+static inline uint64_t tcg_get_insn_start_param(TCGOp *op, int arg)
109
{
110
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
111
- return tcg_get_insn_param(op, arg);
112
-#else
113
- return tcg_get_insn_param(op, arg * 2) |
114
- ((uint64_t)tcg_get_insn_param(op, arg * 2 + 1) << 32);
115
-#endif
116
+ if (TCG_TARGET_REG_BITS == 64) {
117
+ return tcg_get_insn_param(op, arg);
118
+ } else {
119
+ return deposit64(tcg_get_insn_param(op, arg * 2), 32, 32,
120
+ tcg_get_insn_param(op, arg * 2 + 1));
121
+ }
122
}
123
124
-static inline void tcg_set_insn_start_param(TCGOp *op, int arg, target_ulong v)
125
+static inline void tcg_set_insn_start_param(TCGOp *op, int arg, uint64_t v)
126
{
127
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
128
- tcg_set_insn_param(op, arg, v);
129
-#else
130
- tcg_set_insn_param(op, arg * 2, v);
131
- tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
132
-#endif
133
+ if (TCG_TARGET_REG_BITS == 64) {
134
+ tcg_set_insn_param(op, arg, v);
135
+ } else {
136
+ tcg_set_insn_param(op, arg * 2, v);
137
+ tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
138
+ }
139
}
140
141
/* The last op that was emitted. */
142
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/accel/tcg/translate-all.c
145
+++ b/accel/tcg/translate-all.c
146
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
147
148
TBContext tb_ctx;
149
150
-/* Encode VAL as a signed leb128 sequence at P.
151
- Return P incremented past the encoded value. */
152
-static uint8_t *encode_sleb128(uint8_t *p, target_long val)
153
+/*
154
+ * Encode VAL as a signed leb128 sequence at P.
155
+ * Return P incremented past the encoded value.
156
+ */
157
+static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
158
{
159
int more, byte;
160
161
@@ -XXX,XX +XXX,XX @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val)
162
return p;
163
}
164
165
-/* Decode a signed leb128 sequence at *PP; increment *PP past the
166
- decoded value. Return the decoded value. */
167
-static target_long decode_sleb128(const uint8_t **pp)
168
+/*
169
+ * Decode a signed leb128 sequence at *PP; increment *PP past the
170
+ * decoded value. Return the decoded value.
171
+ */
172
+static int64_t decode_sleb128(const uint8_t **pp)
173
{
174
const uint8_t *p = *pp;
175
- target_long val = 0;
176
+ int64_t val = 0;
177
int byte, shift = 0;
178
179
do {
180
byte = *p++;
181
- val |= (target_ulong)(byte & 0x7f) << shift;
182
+ val |= (int64_t)(byte & 0x7f) << shift;
183
shift += 7;
184
} while (byte & 0x80);
185
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
186
- val |= -(target_ulong)1 << shift;
187
+ val |= -(int64_t)1 << shift;
188
}
189
190
*pp = p;
191
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
192
int i, j, n;
193
194
for (i = 0, n = tb->icount; i < n; ++i) {
195
- target_ulong prev;
196
+ uint64_t prev;
197
198
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
199
if (i == 0) {
200
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
201
/* Dump header and the first instruction */
202
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
203
fprintf(logfile,
204
- " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
205
+ " -- guest addr 0x%016" PRIx64 " + tb prologue\n",
206
tcg_ctx->gen_insn_data[insn][0]);
207
chunk_start = tcg_ctx->gen_insn_end_off[insn];
208
disas(logfile, tb->tc.ptr, chunk_start);
209
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
210
while (insn < tb->icount) {
211
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
212
if (chunk_end > chunk_start) {
213
- fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n",
214
+ fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
215
tcg_ctx->gen_insn_data[insn][0]);
216
disas(logfile, tb->tc.ptr + chunk_start,
217
chunk_end - chunk_start);
218
diff --git a/tcg/tcg.c b/tcg/tcg.c
219
index XXXXXXX..XXXXXXX 100644
220
--- a/tcg/tcg.c
221
+++ b/tcg/tcg.c
222
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
223
col += ne_fprintf(f, "\n ----");
224
225
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
226
- target_ulong a;
227
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
228
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
229
-#else
230
- a = op->args[i];
231
-#endif
232
- col += ne_fprintf(f, " " TARGET_FMT_lx, a);
233
+ col += ne_fprintf(f, " %016" PRIx64,
234
+ tcg_get_insn_start_param(op, i));
235
}
236
} else if (c == INDEX_op_call) {
237
const TCGHelperInfo *info = tcg_call_info(op);
238
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
239
}
240
num_insns++;
241
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
242
- target_ulong a;
243
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
244
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
245
-#else
246
- a = op->args[i];
247
-#endif
248
- s->gen_insn_data[num_insns][i] = a;
249
+ s->gen_insn_data[num_insns][i] =
250
+ tcg_get_insn_start_param(op, i);
251
}
252
break;
253
case INDEX_op_discard:
254
--
255
2.34.1
256
257
diff view generated by jsdifflib
New patch
1
Always pass the target address as uint64_t.
2
Adjust tcg_out_{ld,st}_helper_args to match.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/tcg/tcg-ldst.h | 26 +++++++++---------
8
accel/tcg/cputlb.c | 26 +++++++++---------
9
accel/tcg/user-exec.c | 26 +++++++++---------
10
tcg/tcg.c | 62 ++++++++++++++++++++++++++++++++----------
11
4 files changed, 87 insertions(+), 53 deletions(-)
12
13
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-ldst.h
16
+++ b/include/tcg/tcg-ldst.h
17
@@ -XXX,XX +XXX,XX @@
18
#define TCG_LDST_H
19
20
/* Value zero-extended to tcg register size. */
21
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
22
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
23
MemOpIdx oi, uintptr_t retaddr);
24
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
25
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
26
MemOpIdx oi, uintptr_t retaddr);
27
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
28
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
29
MemOpIdx oi, uintptr_t retaddr);
30
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
31
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
32
MemOpIdx oi, uintptr_t retaddr);
33
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
34
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
35
MemOpIdx oi, uintptr_t retaddr);
36
37
/* Value sign-extended to tcg register size. */
38
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
39
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
40
MemOpIdx oi, uintptr_t retaddr);
41
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
42
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
43
MemOpIdx oi, uintptr_t retaddr);
44
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
45
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
46
MemOpIdx oi, uintptr_t retaddr);
47
48
/*
49
* Value extended to at least uint32_t, so that some ABIs do not require
50
* zero-extension from uint8_t or uint16_t.
51
*/
52
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
53
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
54
MemOpIdx oi, uintptr_t retaddr);
55
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
56
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
57
MemOpIdx oi, uintptr_t retaddr);
58
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
59
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
60
MemOpIdx oi, uintptr_t retaddr);
61
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
62
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
63
MemOpIdx oi, uintptr_t retaddr);
64
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
65
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
66
MemOpIdx oi, uintptr_t retaddr);
67
68
#endif /* TCG_LDST_H */
69
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/accel/tcg/cputlb.c
72
+++ b/accel/tcg/cputlb.c
73
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
74
return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
75
}
76
77
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
78
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
79
MemOpIdx oi, uintptr_t retaddr)
80
{
81
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
82
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
83
return ret;
84
}
85
86
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
87
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
88
MemOpIdx oi, uintptr_t retaddr)
89
{
90
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
91
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
92
return ret;
93
}
94
95
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
96
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
97
MemOpIdx oi, uintptr_t retaddr)
98
{
99
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
100
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
101
return ret;
102
}
103
104
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
105
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
106
MemOpIdx oi, uintptr_t retaddr)
107
{
108
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
109
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
110
* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
111
*/
112
113
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
114
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
115
MemOpIdx oi, uintptr_t retaddr)
116
{
117
return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
118
}
119
120
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
121
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
122
MemOpIdx oi, uintptr_t retaddr)
123
{
124
return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
125
}
126
127
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
128
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
129
MemOpIdx oi, uintptr_t retaddr)
130
{
131
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
132
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
133
return ret;
134
}
135
136
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
137
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
138
uint32_t oi, uintptr_t retaddr)
139
{
140
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
141
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
142
}
143
}
144
145
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
146
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
147
MemOpIdx oi, uintptr_t ra)
148
{
149
MMULookupLocals l;
150
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
151
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
152
}
153
154
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
155
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
156
MemOpIdx oi, uintptr_t retaddr)
157
{
158
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
159
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
160
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
161
}
162
163
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
164
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
165
MemOpIdx oi, uintptr_t retaddr)
166
{
167
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
168
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
169
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
170
}
171
172
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
173
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
174
MemOpIdx oi, uintptr_t retaddr)
175
{
176
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
177
@@ -XXX,XX +XXX,XX @@ static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
178
}
179
}
180
181
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
182
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
183
MemOpIdx oi, uintptr_t retaddr)
184
{
185
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
186
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/accel/tcg/user-exec.c
189
+++ b/accel/tcg/user-exec.c
190
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
191
return ret;
192
}
193
194
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
195
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
196
MemOpIdx oi, uintptr_t ra)
197
{
198
return do_ld1_mmu(env, addr, get_memop(oi), ra);
199
}
200
201
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
202
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
203
MemOpIdx oi, uintptr_t ra)
204
{
205
return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
206
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
207
return ret;
208
}
209
210
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
211
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
212
MemOpIdx oi, uintptr_t ra)
213
{
214
MemOp mop = get_memop(oi);
215
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
216
return ret;
217
}
218
219
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
220
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
221
MemOpIdx oi, uintptr_t ra)
222
{
223
MemOp mop = get_memop(oi);
224
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
225
return ret;
226
}
227
228
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
229
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
230
MemOpIdx oi, uintptr_t ra)
231
{
232
MemOp mop = get_memop(oi);
233
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
234
return ret;
235
}
236
237
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
238
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
239
MemOpIdx oi, uintptr_t ra)
240
{
241
MemOp mop = get_memop(oi);
242
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
243
return ret;
244
}
245
246
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
247
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
248
MemOpIdx oi, uintptr_t ra)
249
{
250
MemOp mop = get_memop(oi);
251
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
252
return ret;
253
}
254
255
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
256
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
257
MemOpIdx oi, uintptr_t ra)
258
{
259
MemOp mop = get_memop(oi);
260
@@ -XXX,XX +XXX,XX @@ static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
261
clear_helper_retaddr();
262
}
263
264
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
265
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
266
MemOpIdx oi, uintptr_t ra)
267
{
268
do_st1_mmu(env, addr, val, get_memop(oi), ra);
269
@@ -XXX,XX +XXX,XX @@ static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
270
clear_helper_retaddr();
271
}
272
273
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
274
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
275
MemOpIdx oi, uintptr_t ra)
276
{
277
MemOp mop = get_memop(oi);
278
@@ -XXX,XX +XXX,XX @@ static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
279
clear_helper_retaddr();
280
}
281
282
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
283
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
284
MemOpIdx oi, uintptr_t ra)
285
{
286
MemOp mop = get_memop(oi);
287
@@ -XXX,XX +XXX,XX @@ static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
288
clear_helper_retaddr();
289
}
290
291
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
292
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
293
MemOpIdx oi, uintptr_t ra)
294
{
295
MemOp mop = get_memop(oi);
296
@@ -XXX,XX +XXX,XX @@ static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
297
clear_helper_retaddr();
298
}
299
300
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
301
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
302
MemOpIdx oi, uintptr_t ra)
303
{
304
MemOp mop = get_memop(oi);
305
diff --git a/tcg/tcg.c b/tcg/tcg.c
306
index XXXXXXX..XXXXXXX 100644
307
--- a/tcg/tcg.c
308
+++ b/tcg/tcg.c
309
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld32_mmu = {
310
.flags = TCG_CALL_NO_WG,
311
.typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
312
| dh_typemask(env, 1)
313
- | dh_typemask(tl, 2) /* target_ulong addr */
314
+ | dh_typemask(i64, 2) /* uint64_t addr */
315
| dh_typemask(i32, 3) /* unsigned oi */
316
| dh_typemask(ptr, 4) /* uintptr_t ra */
317
};
318
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld64_mmu = {
319
.flags = TCG_CALL_NO_WG,
320
.typemask = dh_typemask(i64, 0) /* return uint64_t */
321
| dh_typemask(env, 1)
322
- | dh_typemask(tl, 2) /* target_ulong addr */
323
+ | dh_typemask(i64, 2) /* uint64_t addr */
324
| dh_typemask(i32, 3) /* unsigned oi */
325
| dh_typemask(ptr, 4) /* uintptr_t ra */
326
};
327
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld128_mmu = {
328
.flags = TCG_CALL_NO_WG,
329
.typemask = dh_typemask(i128, 0) /* return Int128 */
330
| dh_typemask(env, 1)
331
- | dh_typemask(tl, 2) /* target_ulong addr */
332
+ | dh_typemask(i64, 2) /* uint64_t addr */
333
| dh_typemask(i32, 3) /* unsigned oi */
334
| dh_typemask(ptr, 4) /* uintptr_t ra */
335
};
336
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st32_mmu = {
337
.flags = TCG_CALL_NO_WG,
338
.typemask = dh_typemask(void, 0)
339
| dh_typemask(env, 1)
340
- | dh_typemask(tl, 2) /* target_ulong addr */
341
+ | dh_typemask(i64, 2) /* uint64_t addr */
342
| dh_typemask(i32, 3) /* uint32_t data */
343
| dh_typemask(i32, 4) /* unsigned oi */
344
| dh_typemask(ptr, 5) /* uintptr_t ra */
345
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st64_mmu = {
346
.flags = TCG_CALL_NO_WG,
347
.typemask = dh_typemask(void, 0)
348
| dh_typemask(env, 1)
349
- | dh_typemask(tl, 2) /* target_ulong addr */
350
+ | dh_typemask(i64, 2) /* uint64_t addr */
351
| dh_typemask(i64, 3) /* uint64_t data */
352
| dh_typemask(i32, 4) /* unsigned oi */
353
| dh_typemask(ptr, 5) /* uintptr_t ra */
354
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st128_mmu = {
355
.flags = TCG_CALL_NO_WG,
356
.typemask = dh_typemask(void, 0)
357
| dh_typemask(env, 1)
358
- | dh_typemask(tl, 2) /* target_ulong addr */
359
+ | dh_typemask(i64, 2) /* uint64_t addr */
360
| dh_typemask(i128, 3) /* Int128 data */
361
| dh_typemask(i32, 4) /* unsigned oi */
362
| dh_typemask(ptr, 5) /* uintptr_t ra */
363
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
364
next_arg = 1;
365
366
loc = &info->in[next_arg];
367
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
368
- ldst->addrlo_reg, ldst->addrhi_reg);
369
- next_arg += nmov;
370
+ if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
371
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
372
+ ldst->addrlo_reg, ldst->addrhi_reg);
373
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
374
+ next_arg += nmov;
375
+ } else {
376
+ /*
377
+ * 32-bit host with 32-bit guest: zero-extend the guest address
378
+ * to 64-bits for the helper by storing the low part, then
379
+ * load a zero for the high part.
380
+ */
381
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
382
+ TCG_TYPE_I32, TCG_TYPE_I32,
383
+ ldst->addrlo_reg, -1);
384
+ tcg_out_helper_load_slots(s, 1, mov, parm);
385
386
- tcg_out_helper_load_slots(s, nmov, mov, parm);
387
+ tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
388
+ TCG_TYPE_I32, 0, parm);
389
+ next_arg += 2;
390
+ }
391
392
switch (info->out_kind) {
393
case TCG_CALL_RET_NORMAL:
394
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
395
396
/* Handle addr argument. */
397
loc = &info->in[next_arg];
398
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
399
- ldst->addrlo_reg, ldst->addrhi_reg);
400
- next_arg += n;
401
- nmov += n;
402
+ if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
403
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
404
+ ldst->addrlo_reg, ldst->addrhi_reg);
405
+ next_arg += n;
406
+ nmov += n;
407
+ } else {
408
+ /*
409
+ * 32-bit host with 32-bit guest: zero-extend the guest address
410
+ * to 64-bits for the helper by storing the low part. Later,
411
+ * after we have processed the register inputs, we will load a
412
+ * zero for the high part.
413
+ */
414
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
415
+ TCG_TYPE_I32, TCG_TYPE_I32,
416
+ ldst->addrlo_reg, -1);
417
+ next_arg += 2;
418
+ nmov += 1;
419
+ }
420
421
/* Handle data argument. */
422
loc = &info->in[next_arg];
423
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
424
g_assert_not_reached();
425
}
426
427
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32) {
428
+ loc = &info->in[1 + !HOST_BIG_ENDIAN];
429
+ tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
430
+ }
431
+
432
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
433
}
434
435
--
436
2.34.1
437
438
diff view generated by jsdifflib
1
The base qemu_ram_addr_from_host function is already in
1
Always pass the target address as uint64_t.
2
softmmu/physmem.c; move the nofail version to be adjacent.
3
2
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
6
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
include/exec/cpu-common.h | 1 +
6
accel/tcg/tcg-runtime.h | 4 ++--
10
accel/tcg/cputlb.c | 12 ------------
7
accel/tcg/cputlb.c | 5 ++---
11
softmmu/physmem.c | 12 ++++++++++++
8
accel/tcg/user-exec.c | 5 ++---
12
3 files changed, 13 insertions(+), 12 deletions(-)
9
tcg/tcg-op-ldst.c | 26 ++++++++++++++++++++++++--
10
4 files changed, 30 insertions(+), 10 deletions(-)
13
11
14
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
12
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/include/exec/cpu-common.h
14
--- a/accel/tcg/tcg-runtime.h
17
+++ b/include/exec/cpu-common.h
15
+++ b/accel/tcg/tcg-runtime.h
18
@@ -XXX,XX +XXX,XX @@ typedef uintptr_t ram_addr_t;
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
19
void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
17
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
20
/* This should not be used by devices. */
18
#endif /* IN_HELPER_PROTO */
21
ram_addr_t qemu_ram_addr_from_host(void *ptr);
19
22
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
20
-DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
23
RAMBlock *qemu_ram_block_by_name(const char *name);
21
-DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
24
RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
22
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
25
ram_addr_t *offset);
23
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
24
25
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
26
i32, env, tl, i32, i32, i32)
26
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
27
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
27
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
28
--- a/accel/tcg/cputlb.c
29
--- a/accel/tcg/cputlb.c
29
+++ b/accel/tcg/cputlb.c
30
+++ b/accel/tcg/cputlb.c
30
@@ -XXX,XX +XXX,XX @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
31
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
31
prot, mmu_idx, size);
32
return do_ld16_mmu(env, addr, oi, retaddr);
32
}
33
}
33
34
34
-static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
35
-Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
35
-{
36
+Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, uint32_t oi)
36
- ram_addr_t ram_addr;
37
{
37
-
38
return helper_ld16_mmu(env, addr, oi, GETPC());
38
- ram_addr = qemu_ram_addr_from_host(ptr);
39
}
39
- if (ram_addr == RAM_ADDR_INVALID) {
40
@@ -XXX,XX +XXX,XX @@ void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
40
- error_report("Bad ram pointer %p", ptr);
41
do_st16_mmu(env, addr, val, oi, retaddr);
41
- abort();
42
}
42
- }
43
43
- return ram_addr;
44
-void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
44
-}
45
- MemOpIdx oi)
45
-
46
+void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
46
/*
47
{
47
* Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
48
helper_st16_mmu(env, addr, val, oi, GETPC());
48
* caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
49
}
49
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
50
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
50
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
51
--- a/softmmu/physmem.c
52
--- a/accel/tcg/user-exec.c
52
+++ b/softmmu/physmem.c
53
+++ b/accel/tcg/user-exec.c
53
@@ -XXX,XX +XXX,XX @@ ram_addr_t qemu_ram_addr_from_host(void *ptr)
54
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
54
return block->offset + offset;
55
return ret;
55
}
56
}
56
57
57
+ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
58
-Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
59
+Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
60
{
61
return helper_ld16_mmu(env, addr, oi, GETPC());
62
}
63
@@ -XXX,XX +XXX,XX @@ void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
64
do_st16_he_mmu(env, addr, val, mop, ra);
65
}
66
67
-void helper_st_i128(CPUArchState *env, target_ulong addr,
68
- Int128 val, MemOpIdx oi)
69
+void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
70
{
71
helper_st16_mmu(env, addr, val, oi, GETPC());
72
}
73
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-ldst.c
76
+++ b/tcg/tcg-op-ldst.c
77
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
78
#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
79
#endif
80
81
+static TCGv_i64 maybe_extend_addr64(TCGv addr)
58
+{
82
+{
59
+ ram_addr_t ram_addr;
83
+#if TARGET_LONG_BITS == 32
60
+
84
+ TCGv_i64 a64 = tcg_temp_ebb_new_i64();
61
+ ram_addr = qemu_ram_addr_from_host(ptr);
85
+ tcg_gen_extu_i32_i64(a64, addr);
62
+ if (ram_addr == RAM_ADDR_INVALID) {
86
+ return a64;
63
+ error_report("Bad ram pointer %p", ptr);
87
+#else
64
+ abort();
88
+ return addr;
65
+ }
89
+#endif
66
+ return ram_addr;
67
+}
90
+}
68
+
91
+
69
static MemTxResult flatview_read(FlatView *fv, hwaddr addr,
92
+static void maybe_free_addr64(TCGv_i64 a64)
70
MemTxAttrs attrs, void *buf, hwaddr len);
93
+{
71
static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs,
94
+#if TARGET_LONG_BITS == 32
95
+ tcg_temp_free_i64(a64);
96
+#endif
97
+}
98
+
99
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
100
{
101
const MemOpIdx oi = make_memop_idx(memop, idx);
102
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
103
tcg_gen_bswap64_i64(y, y);
104
}
105
} else {
106
- gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
107
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
108
+ gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
109
+ maybe_free_addr64(a64);
110
}
111
112
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
113
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
114
}
115
tcg_temp_free(addr_p8);
116
} else {
117
- gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
118
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
119
+ gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
120
+ maybe_free_addr64(a64);
121
}
122
123
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
72
--
124
--
73
2.34.1
125
2.34.1
126
127
diff view generated by jsdifflib
1
While there are no target-specific nonfaulting probes,
1
Always pass the target address as uint64_t.
2
generic code may grow some uses at some point.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Note that the attrs argument was incorrect -- it should have
5
been MEMTXATTRS_UNSPECIFIED. Just use the simpler interface.
6
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
5
---
10
target/avr/helper.c | 46 ++++++++++++++++++++++++++++-----------------
6
accel/tcg/tcg-runtime.h | 46 +++++++++++++++++------------------
11
1 file changed, 29 insertions(+), 17 deletions(-)
7
tcg/tcg-op-ldst.c | 38 ++++++++++++++++++++---------
12
8
accel/tcg/atomic_common.c.inc | 14 +++++------
13
diff --git a/target/avr/helper.c b/target/avr/helper.c
9
3 files changed, 57 insertions(+), 41 deletions(-)
10
11
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/target/avr/helper.c
13
--- a/accel/tcg/tcg-runtime.h
16
+++ b/target/avr/helper.c
14
+++ b/accel/tcg/tcg-runtime.h
17
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
18
MMUAccessType access_type, int mmu_idx,
16
DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
19
bool probe, uintptr_t retaddr)
17
20
{
18
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
21
- int prot = 0;
19
- i32, env, tl, i32, i32, i32)
22
- MemTxAttrs attrs = {};
20
+ i32, env, i64, i32, i32, i32)
23
+ int prot, page_size = TARGET_PAGE_SIZE;
21
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
24
uint32_t paddr;
22
- i32, env, tl, i32, i32, i32)
25
23
+ i32, env, i64, i32, i32, i32)
26
address &= TARGET_PAGE_MASK;
24
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
27
25
- i32, env, tl, i32, i32, i32)
28
if (mmu_idx == MMU_CODE_IDX) {
26
+ i32, env, i64, i32, i32, i32)
29
- /* access to code in flash */
27
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
30
+ /* Access to code in flash. */
28
- i32, env, tl, i32, i32, i32)
31
paddr = OFFSET_CODE + address;
29
+ i32, env, i64, i32, i32, i32)
32
prot = PAGE_READ | PAGE_EXEC;
30
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
33
- if (paddr + TARGET_PAGE_SIZE > OFFSET_DATA) {
31
- i32, env, tl, i32, i32, i32)
34
+ if (paddr >= OFFSET_DATA) {
32
+ i32, env, i64, i32, i32, i32)
35
+ /*
33
#ifdef CONFIG_ATOMIC64
36
+ * This should not be possible via any architectural operations.
34
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
37
+ * There is certainly not an exception that we can deliver.
35
- i64, env, tl, i64, i64, i32)
38
+ * Accept probing that might come from generic code.
36
+ i64, env, i64, i64, i64, i32)
39
+ */
37
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
40
+ if (probe) {
38
- i64, env, tl, i64, i64, i32)
41
+ return false;
39
+ i64, env, i64, i64, i64, i32)
42
+ }
40
#endif
43
error_report("execution left flash memory");
41
#ifdef CONFIG_CMPXCHG128
44
abort();
42
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
43
- i128, env, tl, i128, i128, i32)
44
+ i128, env, i64, i128, i128, i32)
45
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
46
- i128, env, tl, i128, i128, i32)
47
+ i128, env, i64, i128, i128, i32)
48
#endif
49
50
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
51
- i128, env, tl, i128, i128, i32)
52
+ i128, env, i64, i128, i128, i32)
53
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
54
- i128, env, tl, i128, i128, i32)
55
+ i128, env, i64, i128, i128, i32)
56
57
#ifdef CONFIG_ATOMIC64
58
#define GEN_ATOMIC_HELPERS(NAME) \
59
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
60
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
61
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
62
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
63
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
64
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
65
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
66
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
67
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
68
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
69
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
70
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
71
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
72
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
73
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
74
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le), \
75
- TCG_CALL_NO_WG, i64, env, tl, i64, i32) \
76
+ TCG_CALL_NO_WG, i64, env, i64, i64, i32) \
77
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be), \
78
- TCG_CALL_NO_WG, i64, env, tl, i64, i32)
79
+ TCG_CALL_NO_WG, i64, env, i64, i64, i32)
80
#else
81
#define GEN_ATOMIC_HELPERS(NAME) \
82
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
83
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
84
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
85
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
86
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
87
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
88
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
89
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
90
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
91
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
92
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
93
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
94
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
95
- TCG_CALL_NO_WG, i32, env, tl, i32, i32)
96
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32)
97
#endif /* CONFIG_ATOMIC64 */
98
99
GEN_ATOMIC_HELPERS(fetch_add)
100
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/tcg/tcg-op-ldst.c
103
+++ b/tcg/tcg-op-ldst.c
104
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
105
}
106
}
107
108
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
109
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
110
TCGv_i32, TCGv_i32, TCGv_i32);
111
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
112
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
113
TCGv_i64, TCGv_i64, TCGv_i32);
114
-typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
115
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
116
TCGv_i128, TCGv_i128, TCGv_i32);
117
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
118
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
119
TCGv_i32, TCGv_i32);
120
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
121
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
122
TCGv_i64, TCGv_i32);
123
124
#ifdef CONFIG_ATOMIC64
125
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
126
TCGv_i32 newv, TCGArg idx, MemOp memop)
127
{
128
gen_atomic_cx_i32 gen;
129
+ TCGv_i64 a64;
130
MemOpIdx oi;
131
132
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
133
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
134
tcg_debug_assert(gen != NULL);
135
136
oi = make_memop_idx(memop & ~MO_SIGN, idx);
137
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
138
+ a64 = maybe_extend_addr64(addr);
139
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
140
+ maybe_free_addr64(a64);
141
142
if (memop & MO_SIGN) {
143
tcg_gen_ext_i32(retv, retv, memop);
144
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
145
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
146
if (gen) {
147
MemOpIdx oi = make_memop_idx(memop, idx);
148
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
149
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
150
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
151
+ maybe_free_addr64(a64);
152
return;
45
}
153
}
46
- } else if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
154
47
- /*
155
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
48
- * access to CPU registers, exit and rebuilt this TB to use full access
156
? gen_helper_nonatomic_cmpxchgo_le
49
- * incase it touches specially handled registers like SREG or SP
157
: gen_helper_nonatomic_cmpxchgo_be);
50
- */
158
MemOpIdx oi = make_memop_idx(memop, idx);
51
- AVRCPU *cpu = AVR_CPU(cs);
159
+ TCGv_i64 a64;
52
- CPUAVRState *env = &cpu->env;
160
53
- env->fullacc = 1;
161
tcg_debug_assert((memop & MO_SIZE) == MO_128);
54
- cpu_loop_exit_restore(cs, retaddr);
162
tcg_debug_assert((memop & MO_SIGN) == 0);
163
164
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
165
+ a64 = maybe_extend_addr64(addr);
166
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
167
+ maybe_free_addr64(a64);
55
} else {
168
} else {
56
- /* access to memory. nothing special */
169
TCGv_i128 oldv = tcg_temp_ebb_new_i128();
57
+ /* Access to memory. */
170
TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
58
paddr = OFFSET_DATA + address;
171
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
59
prot = PAGE_READ | PAGE_WRITE;
172
60
+ if (address < NUMBER_OF_CPU_REGISTERS + NUMBER_OF_IO_REGISTERS) {
173
if (gen) {
61
+ /*
174
MemOpIdx oi = make_memop_idx(memop, idx);
62
+ * Access to CPU registers, exit and rebuilt this TB to use
175
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
63
+ * full access in case it touches specially handled registers
176
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
64
+ * like SREG or SP. For probing, set page_size = 1, in order
177
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
65
+ * to force tlb_fill to be called for the next access.
178
+ maybe_free_addr64(a64);
66
+ */
179
return;
67
+ if (probe) {
68
+ page_size = 1;
69
+ } else {
70
+ AVRCPU *cpu = AVR_CPU(cs);
71
+ CPUAVRState *env = &cpu->env;
72
+ env->fullacc = 1;
73
+ cpu_loop_exit_restore(cs, retaddr);
74
+ }
75
+ }
76
}
180
}
77
181
78
- tlb_set_page_with_attrs(cs, address, paddr, attrs, prot,
182
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
79
- mmu_idx, TARGET_PAGE_SIZE);
183
TCGArg idx, MemOp memop, void * const table[])
80
-
184
{
81
+ tlb_set_page(cs, address, paddr, prot, mmu_idx, page_size);
185
gen_atomic_op_i32 gen;
82
return true;
186
+ TCGv_i64 a64;
83
}
187
MemOpIdx oi;
188
189
memop = tcg_canonicalize_memop(memop, 0, 0);
190
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
191
tcg_debug_assert(gen != NULL);
192
193
oi = make_memop_idx(memop & ~MO_SIGN, idx);
194
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
195
+ a64 = maybe_extend_addr64(addr);
196
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
197
+ maybe_free_addr64(a64);
198
199
if (memop & MO_SIGN) {
200
tcg_gen_ext_i32(ret, ret, memop);
201
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
202
if ((memop & MO_SIZE) == MO_64) {
203
#ifdef CONFIG_ATOMIC64
204
gen_atomic_op_i64 gen;
205
+ TCGv_i64 a64;
206
MemOpIdx oi;
207
208
gen = table[memop & (MO_SIZE | MO_BSWAP)];
209
tcg_debug_assert(gen != NULL);
210
211
oi = make_memop_idx(memop & ~MO_SIGN, idx);
212
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
213
+ a64 = maybe_extend_addr64(addr);
214
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
215
+ maybe_free_addr64(a64);
216
#else
217
gen_helper_exit_atomic(cpu_env);
218
/* Produce a result, so that we have a well-formed opcode stream
219
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
220
index XXXXXXX..XXXXXXX 100644
221
--- a/accel/tcg/atomic_common.c.inc
222
+++ b/accel/tcg/atomic_common.c.inc
223
@@ -XXX,XX +XXX,XX @@
224
* See the COPYING file in the top-level directory.
225
*/
226
227
-static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
228
+static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
229
MemOpIdx oi)
230
{
231
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
232
}
233
234
#if HAVE_ATOMIC128
235
-static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
236
+static void atomic_trace_ld_post(CPUArchState *env, uint64_t addr,
237
MemOpIdx oi)
238
{
239
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
240
}
241
242
-static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
243
+static void atomic_trace_st_post(CPUArchState *env, uint64_t addr,
244
MemOpIdx oi)
245
{
246
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
247
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
248
*/
249
250
#define CMPXCHG_HELPER(OP, TYPE) \
251
- TYPE HELPER(atomic_##OP)(CPUArchState *env, target_ulong addr, \
252
+ TYPE HELPER(atomic_##OP)(CPUArchState *env, uint64_t addr, \
253
TYPE oldv, TYPE newv, uint32_t oi) \
254
{ return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); }
255
256
@@ -XXX,XX +XXX,XX @@ CMPXCHG_HELPER(cmpxchgo_le, Int128)
257
258
#undef CMPXCHG_HELPER
259
260
-Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
261
+Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, uint64_t addr,
262
Int128 cmpv, Int128 newv, uint32_t oi)
263
{
264
#if TCG_TARGET_REG_BITS == 32
265
@@ -XXX,XX +XXX,XX @@ Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
266
#endif
267
}
268
269
-Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
270
+Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, uint64_t addr,
271
Int128 cmpv, Int128 newv, uint32_t oi)
272
{
273
#if TCG_TARGET_REG_BITS == 32
274
@@ -XXX,XX +XXX,XX @@ Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
275
}
276
277
#define ATOMIC_HELPER(OP, TYPE) \
278
- TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \
279
+ TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, uint64_t addr, \
280
TYPE val, uint32_t oi) \
281
{ return glue(glue(cpu_atomic_,OP),_mmu)(env, addr, val, oi, GETPC()); }
84
282
85
--
283
--
86
2.34.1
284
2.34.1
87
285
88
286
diff view generated by jsdifflib
New patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
include/tcg/tcg.h | 2 +-
5
tcg/tcg.c | 2 +-
6
2 files changed, 2 insertions(+), 2 deletions(-)
1
7
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
9
index XXXXXXX..XXXXXXX 100644
10
--- a/include/tcg/tcg.h
11
+++ b/include/tcg/tcg.h
12
@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
13
void tcg_prologue_init(TCGContext *s);
14
void tcg_func_start(TCGContext *s);
15
16
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
17
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start);
18
19
void tb_target_set_jmp_target(const TranslationBlock *, int,
20
uintptr_t, uintptr_t);
21
diff --git a/tcg/tcg.c b/tcg/tcg.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/tcg.c
24
+++ b/tcg/tcg.c
25
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
26
#endif
27
28
29
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
30
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
31
{
32
#ifdef CONFIG_PROFILER
33
TCGProfile *prof = &s->prof;
34
--
35
2.34.1
36
37
diff view generated by jsdifflib
New patch
1
As gen_mem_wrapped is only used in plugin_gen_empty_mem_callback,
2
we can avoid the curiosity of union mem_gen_fn by inlining it.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
accel/tcg/plugin-gen.c | 30 ++++++------------------------
8
1 file changed, 6 insertions(+), 24 deletions(-)
9
10
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/plugin-gen.c
13
+++ b/accel/tcg/plugin-gen.c
14
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
15
}
16
}
17
18
-union mem_gen_fn {
19
- void (*mem_fn)(TCGv, uint32_t);
20
- void (*inline_fn)(void);
21
-};
22
-
23
-static void gen_mem_wrapped(enum plugin_gen_cb type,
24
- const union mem_gen_fn *f, TCGv addr,
25
- uint32_t info, bool is_mem)
26
+void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
27
{
28
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
29
30
- gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
31
- if (is_mem) {
32
- f->mem_fn(addr, info);
33
- } else {
34
- f->inline_fn();
35
- }
36
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_MEM, rw);
37
+ gen_empty_mem_cb(addr, info);
38
tcg_gen_plugin_cb_end();
39
-}
40
41
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
42
-{
43
- union mem_gen_fn fn;
44
-
45
- fn.mem_fn = gen_empty_mem_cb;
46
- gen_mem_wrapped(PLUGIN_GEN_CB_MEM, &fn, addr, info, true);
47
-
48
- fn.inline_fn = gen_empty_inline_cb;
49
- gen_mem_wrapped(PLUGIN_GEN_CB_INLINE, &fn, 0, info, false);
50
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_INLINE, rw);
51
+ gen_empty_inline_cb();
52
+ tcg_gen_plugin_cb_end();
53
}
54
55
static TCGOp *find_op(TCGOp *op, TCGOpcode opc)
56
--
57
2.34.1
58
59
diff view generated by jsdifflib
New patch
1
As do_gen_mem_cb is called once, merge it into gen_empty_mem_cb.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
accel/tcg/plugin-gen.c | 39 +++++++++++++++++----------------------
7
1 file changed, 17 insertions(+), 22 deletions(-)
8
9
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/accel/tcg/plugin-gen.c
12
+++ b/accel/tcg/plugin-gen.c
13
@@ -XXX,XX +XXX,XX @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
14
void *userdata)
15
{ }
16
17
-static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
18
-{
19
- TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
20
- TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
21
- TCGv_i64 vaddr64 = tcg_temp_ebb_new_i64();
22
- TCGv_ptr udata = tcg_temp_ebb_new_ptr();
23
-
24
- tcg_gen_movi_i32(meminfo, info);
25
- tcg_gen_movi_ptr(udata, 0);
26
- tcg_gen_ld_i32(cpu_index, cpu_env,
27
- -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
28
- tcg_gen_extu_tl_i64(vaddr64, vaddr);
29
-
30
- gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, vaddr64, udata);
31
-
32
- tcg_temp_free_ptr(udata);
33
- tcg_temp_free_i64(vaddr64);
34
- tcg_temp_free_i32(meminfo);
35
- tcg_temp_free_i32(cpu_index);
36
-}
37
-
38
static void gen_empty_udata_cb(void)
39
{
40
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
41
@@ -XXX,XX +XXX,XX @@ static void gen_empty_inline_cb(void)
42
43
static void gen_empty_mem_cb(TCGv addr, uint32_t info)
44
{
45
- do_gen_mem_cb(addr, info);
46
+ TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
47
+ TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
48
+ TCGv_i64 addr64 = tcg_temp_ebb_new_i64();
49
+ TCGv_ptr udata = tcg_temp_ebb_new_ptr();
50
+
51
+ tcg_gen_movi_i32(meminfo, info);
52
+ tcg_gen_movi_ptr(udata, 0);
53
+ tcg_gen_ld_i32(cpu_index, cpu_env,
54
+ -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
55
+ tcg_gen_extu_tl_i64(addr64, addr);
56
+
57
+ gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr64, udata);
58
+
59
+ tcg_temp_free_ptr(udata);
60
+ tcg_temp_free_i64(addr64);
61
+ tcg_temp_free_i32(meminfo);
62
+ tcg_temp_free_i32(cpu_index);
63
}
64
65
/*
66
--
67
2.34.1
68
69
diff view generated by jsdifflib
1
We cannot deliver two interrupts simultaneously;
1
We only need to make copies for loads, when the destination
2
the first interrupt handler must execute first.
2
overlaps the address. For now, only eliminate the copy for
3
stores and 128-bit loads.
3
4
4
Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
5
Rename plugin_prep_mem_callbacks to plugin_maybe_preserve_addr,
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
returning NULL if no copy is made.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
target/avr/helper.c | 9 +++------
11
tcg/tcg-op-ldst.c | 38 ++++++++++++++++++++------------------
9
1 file changed, 3 insertions(+), 6 deletions(-)
12
1 file changed, 20 insertions(+), 18 deletions(-)
10
13
11
diff --git a/target/avr/helper.c b/target/avr/helper.c
14
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/target/avr/helper.c
16
--- a/tcg/tcg-op-ldst.c
14
+++ b/target/avr/helper.c
17
+++ b/tcg/tcg-op-ldst.c
15
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
16
19
}
17
bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
20
}
21
22
-static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
23
+/* Only required for loads, where value might overlap addr. */
24
+static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
18
{
25
{
19
- bool ret = false;
26
#ifdef CONFIG_PLUGIN
20
AVRCPU *cpu = AVR_CPU(cs);
27
if (tcg_ctx->plugin_insn != NULL) {
21
CPUAVRState *env = &cpu->env;
28
@@ -XXX,XX +XXX,XX @@ static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
22
29
return temp;
23
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
30
}
24
avr_cpu_do_interrupt(cs);
31
#endif
25
32
- return vaddr;
26
cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
33
+ return NULL;
27
-
34
}
28
- ret = true;
35
29
+ return true;
36
-static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
37
- enum qemu_plugin_mem_rw rw)
38
+static void
39
+plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
40
+ enum qemu_plugin_mem_rw rw)
41
{
42
#ifdef CONFIG_PLUGIN
43
if (tcg_ctx->plugin_insn != NULL) {
44
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
45
- plugin_gen_empty_mem_callback(vaddr, info);
46
- tcg_temp_free(vaddr);
47
+ plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
48
+ if (copy_addr) {
49
+ tcg_temp_free(copy_addr);
50
+ }
51
}
52
#endif
53
}
54
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
55
{
56
MemOp orig_memop;
57
MemOpIdx oi;
58
+ TCGv copy_addr;
59
60
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
61
memop = tcg_canonicalize_memop(memop, 0, 0);
62
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
30
}
63
}
31
}
64
}
32
if (interrupt_request & CPU_INTERRUPT_HARD) {
65
33
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
66
- addr = plugin_prep_mem_callbacks(addr);
34
if (!env->intsrc) {
67
+ copy_addr = plugin_maybe_preserve_addr(addr);
35
cs->interrupt_request &= ~CPU_INTERRUPT_HARD;
68
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
36
}
69
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
37
-
70
+ plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
38
- ret = true;
71
39
+ return true;
72
if ((orig_memop ^ memop) & MO_BSWAP) {
73
switch (orig_memop & MO_SIZE) {
74
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
75
memop &= ~MO_BSWAP;
76
}
77
78
- addr = plugin_prep_mem_callbacks(addr);
79
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
80
gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
81
} else {
82
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
83
}
84
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
85
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
86
87
if (swap) {
88
tcg_temp_free_i32(swap);
89
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
90
{
91
MemOp orig_memop;
92
MemOpIdx oi;
93
+ TCGv copy_addr;
94
95
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
96
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
97
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
40
}
98
}
41
}
99
}
42
- return ret;
100
43
+ return false;
101
- addr = plugin_prep_mem_callbacks(addr);
102
+ copy_addr = plugin_maybe_preserve_addr(addr);
103
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
104
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
105
+ plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
106
107
if ((orig_memop ^ memop) & MO_BSWAP) {
108
int flags = (orig_memop & MO_SIGN
109
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
110
memop &= ~MO_BSWAP;
111
}
112
113
- addr = plugin_prep_mem_callbacks(addr);
114
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
115
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
116
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
117
118
if (swap) {
119
tcg_temp_free_i64(swap);
120
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
121
tcg_debug_assert((memop & MO_SIGN) == 0);
122
123
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
124
- addr = plugin_prep_mem_callbacks(addr);
125
126
/* TODO: For now, force 32-bit hosts to use the helper. */
127
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
129
maybe_free_addr64(a64);
130
}
131
132
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
133
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
44
}
134
}
45
135
46
void avr_cpu_do_interrupt(CPUState *cs)
136
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
137
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
138
tcg_debug_assert((memop & MO_SIGN) == 0);
139
140
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
141
- addr = plugin_prep_mem_callbacks(addr);
142
143
/* TODO: For now, force 32-bit hosts to use the helper. */
144
145
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
146
maybe_free_addr64(a64);
147
}
148
149
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
150
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
151
}
152
153
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
47
--
154
--
48
2.34.1
155
2.34.1
49
156
50
157
diff view generated by jsdifflib
New patch
1
Since we do this inside gen_empty_mem_cb anyway, let's
2
do this earlier inside tcg expansion.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/plugin-gen.h | 4 ++--
8
accel/tcg/plugin-gen.c | 9 +++------
9
tcg/tcg-op-ldst.c | 28 ++++++++++++++++++++--------
10
3 files changed, 25 insertions(+), 16 deletions(-)
11
12
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/exec/plugin-gen.h
15
+++ b/include/exec/plugin-gen.h
16
@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
17
void plugin_gen_insn_end(void);
18
19
void plugin_gen_disable_mem_helpers(void);
20
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info);
21
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info);
22
23
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
24
{
25
@@ -XXX,XX +XXX,XX @@ static inline void plugin_gen_tb_end(CPUState *cpu)
26
static inline void plugin_gen_disable_mem_helpers(void)
27
{ }
28
29
-static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
30
+static inline void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
31
{ }
32
33
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
34
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/accel/tcg/plugin-gen.c
37
+++ b/accel/tcg/plugin-gen.c
38
@@ -XXX,XX +XXX,XX @@ static void gen_empty_inline_cb(void)
39
tcg_temp_free_i64(val);
40
}
41
42
-static void gen_empty_mem_cb(TCGv addr, uint32_t info)
43
+static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
44
{
45
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
46
TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
47
- TCGv_i64 addr64 = tcg_temp_ebb_new_i64();
48
TCGv_ptr udata = tcg_temp_ebb_new_ptr();
49
50
tcg_gen_movi_i32(meminfo, info);
51
tcg_gen_movi_ptr(udata, 0);
52
tcg_gen_ld_i32(cpu_index, cpu_env,
53
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
54
- tcg_gen_extu_tl_i64(addr64, addr);
55
56
- gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr64, udata);
57
+ gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr, udata);
58
59
tcg_temp_free_ptr(udata);
60
- tcg_temp_free_i64(addr64);
61
tcg_temp_free_i32(meminfo);
62
tcg_temp_free_i32(cpu_index);
63
}
64
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
65
}
66
}
67
68
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
69
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
70
{
71
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
72
73
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-ldst.c
76
+++ b/tcg/tcg-op-ldst.c
77
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
78
}
79
80
/* Only required for loads, where value might overlap addr. */
81
-static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
82
+static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
83
{
84
#ifdef CONFIG_PLUGIN
85
if (tcg_ctx->plugin_insn != NULL) {
86
/* Save a copy of the vaddr for use after a load. */
87
- TCGv temp = tcg_temp_new();
88
- tcg_gen_mov_tl(temp, vaddr);
89
+ TCGv_i64 temp = tcg_temp_ebb_new_i64();
90
+ tcg_gen_extu_tl_i64(temp, vaddr);
91
return temp;
92
}
93
#endif
94
@@ -XXX,XX +XXX,XX @@ static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
95
}
96
97
static void
98
-plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
99
+plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
100
enum qemu_plugin_mem_rw rw)
101
{
102
#ifdef CONFIG_PLUGIN
103
if (tcg_ctx->plugin_insn != NULL) {
104
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
105
- plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
106
+
107
+#if TARGET_LONG_BITS == 64
108
if (copy_addr) {
109
- tcg_temp_free(copy_addr);
110
+ plugin_gen_empty_mem_callback(copy_addr, info);
111
+ tcg_temp_free_i64(copy_addr);
112
+ } else {
113
+ plugin_gen_empty_mem_callback(orig_addr, info);
114
}
115
+#else
116
+ if (!copy_addr) {
117
+ copy_addr = tcg_temp_ebb_new_i64();
118
+ tcg_gen_extu_tl_i64(copy_addr, orig_addr);
119
+ }
120
+ plugin_gen_empty_mem_callback(copy_addr, info);
121
+ tcg_temp_free_i64(copy_addr);
122
+#endif
123
}
124
#endif
125
}
126
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
127
{
128
MemOp orig_memop;
129
MemOpIdx oi;
130
- TCGv copy_addr;
131
+ TCGv_i64 copy_addr;
132
133
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
134
memop = tcg_canonicalize_memop(memop, 0, 0);
135
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
136
{
137
MemOp orig_memop;
138
MemOpIdx oi;
139
- TCGv copy_addr;
140
+ TCGv_i64 copy_addr;
141
142
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
143
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
144
--
145
2.34.1
146
147
diff view generated by jsdifflib
New patch
1
This will enable replacement of TARGET_LONG_BITS within tcg/.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 1 +
7
accel/tcg/translate-all.c | 2 ++
8
tcg/tcg.c | 3 +++
9
3 files changed, 6 insertions(+)
10
11
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg.h
14
+++ b/include/tcg/tcg.h
15
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
16
int nb_temps;
17
int nb_indirects;
18
int nb_ops;
19
+ TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
20
21
TCGRegSet reserved_regs;
22
intptr_t current_frame_offset;
23
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/accel/tcg/translate-all.c
26
+++ b/accel/tcg/translate-all.c
27
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
28
tb_set_page_addr0(tb, phys_pc);
29
tb_set_page_addr1(tb, -1);
30
tcg_ctx->gen_tb = tb;
31
+ tcg_ctx->addr_type = TCG_TYPE_TL;
32
+
33
tb_overflow:
34
35
#ifdef CONFIG_PROFILER
36
diff --git a/tcg/tcg.c b/tcg/tcg.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/tcg.c
39
+++ b/tcg/tcg.c
40
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
41
QTAILQ_INIT(&s->ops);
42
QTAILQ_INIT(&s->free_ops);
43
QSIMPLEQ_INIT(&s->labels);
44
+
45
+ tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
46
+ s->addr_type == TCG_TYPE_I64);
47
}
48
49
static TCGTemp *tcg_temp_alloc(TCGContext *s)
50
--
51
2.34.1
52
53
diff view generated by jsdifflib
New patch
1
Expand from TCGv to TCGTemp inline in the translators,
2
and validate that the size matches tcg_ctx->addr_type.
3
These inlines will eventually be seen only by target-specific code.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op.h | 50 ++++++-
9
tcg/tcg-op-ldst.c | 343 ++++++++++++++++++++++++++-----------------
10
2 files changed, 251 insertions(+), 142 deletions(-)
11
12
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op.h
15
+++ b/include/tcg/tcg-op.h
16
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_plugin_cb_end(void)
17
#define tcg_temp_new() tcg_temp_new_i32()
18
#define tcg_global_mem_new tcg_global_mem_new_i32
19
#define tcg_temp_free tcg_temp_free_i32
20
+#define tcgv_tl_temp tcgv_i32_temp
21
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
22
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
23
#else
24
#define tcg_temp_new() tcg_temp_new_i64()
25
#define tcg_global_mem_new tcg_global_mem_new_i64
26
#define tcg_temp_free tcg_temp_free_i64
27
+#define tcgv_tl_temp tcgv_i64_temp
28
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
29
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
30
#endif
31
32
-void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
33
-void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
34
-void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
35
-void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
36
-void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
37
-void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
38
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
39
+void tcg_gen_qemu_st_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
40
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
41
+void tcg_gen_qemu_st_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
42
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
43
+void tcg_gen_qemu_st_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
44
+
45
+static inline void
46
+tcg_gen_qemu_ld_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
47
+{
48
+ tcg_gen_qemu_ld_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
49
+}
50
+
51
+static inline void
52
+tcg_gen_qemu_st_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
53
+{
54
+ tcg_gen_qemu_st_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
55
+}
56
+
57
+static inline void
58
+tcg_gen_qemu_ld_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
59
+{
60
+ tcg_gen_qemu_ld_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
61
+}
62
+
63
+static inline void
64
+tcg_gen_qemu_st_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
65
+{
66
+ tcg_gen_qemu_st_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
67
+}
68
+
69
+static inline void
70
+tcg_gen_qemu_ld_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
71
+{
72
+ tcg_gen_qemu_ld_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
73
+}
74
+
75
+static inline void
76
+tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
77
+{
78
+ tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
79
+}
80
81
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
82
TCGArg, MemOp);
83
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/tcg-op-ldst.c
86
+++ b/tcg/tcg-op-ldst.c
87
@@ -XXX,XX +XXX,XX @@ static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
88
return op;
89
}
90
91
-static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
92
- MemOp memop, TCGArg idx)
93
+static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
94
+ TCGTemp *addr, MemOpIdx oi)
95
{
96
- MemOpIdx oi = make_memop_idx(memop, idx);
97
-#if TARGET_LONG_BITS == 32
98
- tcg_gen_op3i_i32(opc, val, addr, oi);
99
-#else
100
- if (TCG_TARGET_REG_BITS == 32) {
101
- tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
102
+ if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
103
+ if (vh) {
104
+ tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
105
+ } else {
106
+ tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
107
+ }
108
} else {
109
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
110
+ /* See TCGV_LOW/HIGH. */
111
+ TCGTemp *al = addr + HOST_BIG_ENDIAN;
112
+ TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
113
+
114
+ if (vh) {
115
+ tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
116
+ temp_arg(al), temp_arg(ah), oi);
117
+ } else {
118
+ tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
119
+ }
120
}
121
-#endif
122
}
123
124
-static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
125
- MemOp memop, TCGArg idx)
126
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
127
{
128
- MemOpIdx oi = make_memop_idx(memop, idx);
129
-#if TARGET_LONG_BITS == 32
130
if (TCG_TARGET_REG_BITS == 32) {
131
- tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
132
+ TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
133
+ TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
134
+ gen_ldst(opc, vl, vh, addr, oi);
135
} else {
136
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
137
+ gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
138
}
139
-#else
140
- if (TCG_TARGET_REG_BITS == 32) {
141
- tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
142
- TCGV_LOW(addr), TCGV_HIGH(addr), oi);
143
- } else {
144
- tcg_gen_op3i_i64(opc, val, addr, oi);
145
- }
146
-#endif
147
}
148
149
static void tcg_gen_req_mo(TCGBar type)
150
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
151
}
152
153
/* Only required for loads, where value might overlap addr. */
154
-static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
155
+static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
156
{
157
#ifdef CONFIG_PLUGIN
158
if (tcg_ctx->plugin_insn != NULL) {
159
/* Save a copy of the vaddr for use after a load. */
160
TCGv_i64 temp = tcg_temp_ebb_new_i64();
161
- tcg_gen_extu_tl_i64(temp, vaddr);
162
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
163
+ tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
164
+ } else {
165
+ tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
166
+ }
167
return temp;
168
}
169
#endif
170
@@ -XXX,XX +XXX,XX @@ static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
171
}
172
173
static void
174
-plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
175
+plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
176
enum qemu_plugin_mem_rw rw)
177
{
178
#ifdef CONFIG_PLUGIN
179
if (tcg_ctx->plugin_insn != NULL) {
180
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
181
182
-#if TARGET_LONG_BITS == 64
183
- if (copy_addr) {
184
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
185
+ if (!copy_addr) {
186
+ copy_addr = tcg_temp_ebb_new_i64();
187
+ tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
188
+ }
189
plugin_gen_empty_mem_callback(copy_addr, info);
190
tcg_temp_free_i64(copy_addr);
191
} else {
192
- plugin_gen_empty_mem_callback(orig_addr, info);
193
+ if (copy_addr) {
194
+ plugin_gen_empty_mem_callback(copy_addr, info);
195
+ tcg_temp_free_i64(copy_addr);
196
+ } else {
197
+ plugin_gen_empty_mem_callback(temp_tcgv_i64(orig_addr), info);
198
+ }
199
}
200
-#else
201
- if (!copy_addr) {
202
- copy_addr = tcg_temp_ebb_new_i64();
203
- tcg_gen_extu_tl_i64(copy_addr, orig_addr);
204
- }
205
- plugin_gen_empty_mem_callback(copy_addr, info);
206
- tcg_temp_free_i64(copy_addr);
207
-#endif
208
}
209
#endif
210
}
211
212
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
213
+static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
214
+ TCGArg idx, MemOp memop)
215
{
216
MemOp orig_memop;
217
- MemOpIdx oi;
218
+ MemOpIdx orig_oi, oi;
219
TCGv_i64 copy_addr;
220
221
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
222
- memop = tcg_canonicalize_memop(memop, 0, 0);
223
- oi = make_memop_idx(memop, idx);
224
+ orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
225
+ orig_oi = oi = make_memop_idx(memop, idx);
226
227
- orig_memop = memop;
228
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
229
memop &= ~MO_BSWAP;
230
/* The bswap primitive benefits from zero-extended input. */
231
if ((memop & MO_SSIZE) == MO_SW) {
232
memop &= ~MO_SIGN;
233
}
234
+ oi = make_memop_idx(memop, idx);
235
}
236
237
copy_addr = plugin_maybe_preserve_addr(addr);
238
- gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
239
- plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
240
+ gen_ldst(INDEX_op_qemu_ld_i32, tcgv_i32_temp(val), NULL, addr, oi);
241
+ plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
242
243
if ((orig_memop ^ memop) & MO_BSWAP) {
244
switch (orig_memop & MO_SIZE) {
245
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
246
}
247
}
248
249
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
250
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
251
+ MemOp memop, TCGType addr_type)
252
+{
253
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
254
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
255
+ tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
256
+}
257
+
258
+static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
259
+ TCGArg idx, MemOp memop)
260
{
261
TCGv_i32 swap = NULL;
262
- MemOpIdx oi;
263
+ MemOpIdx orig_oi, oi;
264
+ TCGOpcode opc;
265
266
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
267
memop = tcg_canonicalize_memop(memop, 0, 1);
268
- oi = make_memop_idx(memop, idx);
269
+ orig_oi = oi = make_memop_idx(memop, idx);
270
271
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
272
swap = tcg_temp_ebb_new_i32();
273
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
274
}
275
val = swap;
276
memop &= ~MO_BSWAP;
277
+ oi = make_memop_idx(memop, idx);
278
}
279
280
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
281
- gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
282
+ opc = INDEX_op_qemu_st8_i32;
283
} else {
284
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
285
+ opc = INDEX_op_qemu_st_i32;
286
}
287
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
288
+ gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
289
+ plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
290
291
if (swap) {
292
tcg_temp_free_i32(swap);
293
}
294
}
295
296
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
297
+void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
298
+ MemOp memop, TCGType addr_type)
299
+{
300
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
301
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
302
+ tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
303
+}
304
+
305
+static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
306
+ TCGArg idx, MemOp memop)
307
{
308
MemOp orig_memop;
309
- MemOpIdx oi;
310
+ MemOpIdx orig_oi, oi;
311
TCGv_i64 copy_addr;
312
313
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
314
- tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
315
+ tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
316
if (memop & MO_SIGN) {
317
tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
318
} else {
319
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
320
}
321
322
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
323
- memop = tcg_canonicalize_memop(memop, 1, 0);
324
- oi = make_memop_idx(memop, idx);
325
+ orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
326
+ orig_oi = oi = make_memop_idx(memop, idx);
327
328
- orig_memop = memop;
329
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
330
memop &= ~MO_BSWAP;
331
/* The bswap primitive benefits from zero-extended input. */
332
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
333
memop &= ~MO_SIGN;
334
}
335
+ oi = make_memop_idx(memop, idx);
336
}
337
338
copy_addr = plugin_maybe_preserve_addr(addr);
339
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
340
- plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
341
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
342
+ plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
343
344
if ((orig_memop ^ memop) & MO_BSWAP) {
345
int flags = (orig_memop & MO_SIGN
346
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
347
}
348
}
349
350
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
351
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
352
+ MemOp memop, TCGType addr_type)
353
+{
354
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
355
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
356
+ tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
357
+}
358
+
359
+static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
360
+ TCGArg idx, MemOp memop)
361
{
362
TCGv_i64 swap = NULL;
363
- MemOpIdx oi;
364
+ MemOpIdx orig_oi, oi;
365
366
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
367
- tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
368
+ tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
369
return;
370
}
371
372
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
373
memop = tcg_canonicalize_memop(memop, 1, 1);
374
- oi = make_memop_idx(memop, idx);
375
+ orig_oi = oi = make_memop_idx(memop, idx);
376
377
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
378
swap = tcg_temp_ebb_new_i64();
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
380
}
381
val = swap;
382
memop &= ~MO_BSWAP;
383
+ oi = make_memop_idx(memop, idx);
384
}
385
386
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
387
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
388
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
389
+ plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
390
391
if (swap) {
392
tcg_temp_free_i64(swap);
393
}
394
}
395
396
+void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
397
+ MemOp memop, TCGType addr_type)
398
+{
399
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
400
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
401
+ tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
402
+}
403
+
404
/*
405
* Return true if @mop, without knowledge of the pointer alignment,
406
* does not require 16-byte atomicity, and it would be adventagous
407
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
408
{
409
MemOp mop_1 = orig, mop_2;
410
411
- tcg_debug_assert((orig & MO_SIZE) == MO_128);
412
- tcg_debug_assert((orig & MO_SIGN) == 0);
413
-
414
/* Reduce the size to 64-bit. */
415
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
416
417
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
418
ret[1] = mop_2;
419
}
420
421
-#if TARGET_LONG_BITS == 64
422
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
423
-#else
424
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
425
-#endif
426
-
427
static TCGv_i64 maybe_extend_addr64(TCGv addr)
428
{
429
#if TARGET_LONG_BITS == 32
430
@@ -XXX,XX +XXX,XX @@ static void maybe_free_addr64(TCGv_i64 a64)
431
#endif
432
}
433
434
-void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
435
+static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
436
+ TCGArg idx, MemOp memop)
437
{
438
- const MemOpIdx oi = make_memop_idx(memop, idx);
439
-
440
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
441
- tcg_debug_assert((memop & MO_SIGN) == 0);
442
+ const MemOpIdx orig_oi = make_memop_idx(memop, idx);
443
+ TCGv_i64 ext_addr = NULL;
444
445
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
446
447
/* TODO: For now, force 32-bit hosts to use the helper. */
448
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
449
TCGv_i64 lo, hi;
450
- TCGArg addr_arg;
451
- MemOpIdx adj_oi;
452
bool need_bswap = false;
453
+ MemOpIdx oi = orig_oi;
454
455
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
456
lo = TCGV128_HIGH(val);
457
hi = TCGV128_LOW(val);
458
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
459
+ oi = make_memop_idx(memop & ~MO_BSWAP, idx);
460
need_bswap = true;
461
} else {
462
lo = TCGV128_LOW(val);
463
hi = TCGV128_HIGH(val);
464
- adj_oi = oi;
465
}
466
467
-#if TARGET_LONG_BITS == 32
468
- addr_arg = tcgv_i32_arg(addr);
469
-#else
470
- addr_arg = tcgv_i64_arg(addr);
471
-#endif
472
- tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
473
+ gen_ldst(INDEX_op_qemu_ld_i128, tcgv_i64_temp(lo),
474
+ tcgv_i64_temp(hi), addr, oi);
475
476
if (need_bswap) {
477
tcg_gen_bswap64_i64(lo, lo);
478
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
479
}
480
} else if (use_two_i64_for_i128(memop)) {
481
MemOp mop[2];
482
- TCGv addr_p8;
483
+ TCGTemp *addr_p8;
484
TCGv_i64 x, y;
485
+ MemOpIdx oi;
486
+ bool need_bswap;
487
488
canonicalize_memop_i128_as_i64(mop, memop);
489
+ need_bswap = (mop[0] ^ memop) & MO_BSWAP;
490
491
/*
492
* Since there are no global TCGv_i128, there is no visible state
493
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
494
y = TCGV128_LOW(val);
495
}
496
497
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
498
+ oi = make_memop_idx(mop[0], idx);
499
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, oi);
500
501
- if ((mop[0] ^ memop) & MO_BSWAP) {
502
+ if (need_bswap) {
503
tcg_gen_bswap64_i64(x, x);
504
}
505
506
- addr_p8 = tcg_temp_ebb_new();
507
- tcg_gen_addi_tl(addr_p8, addr, 8);
508
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
509
- tcg_temp_free(addr_p8);
510
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
511
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
512
+ tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
513
+ addr_p8 = tcgv_i32_temp(t);
514
+ } else {
515
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
516
+ tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
517
+ addr_p8 = tcgv_i64_temp(t);
518
+ }
519
520
- if ((mop[0] ^ memop) & MO_BSWAP) {
521
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, oi);
522
+ tcg_temp_free_internal(addr_p8);
523
+
524
+ if (need_bswap) {
525
tcg_gen_bswap64_i64(y, y);
526
}
527
} else {
528
- TCGv_i64 a64 = maybe_extend_addr64(addr);
529
- gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
530
- maybe_free_addr64(a64);
531
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
532
+ ext_addr = tcg_temp_ebb_new_i64();
533
+ tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
534
+ addr = tcgv_i64_temp(ext_addr);
535
+ }
536
+ gen_helper_ld_i128(val, cpu_env, temp_tcgv_i64(addr),
537
+ tcg_constant_i32(orig_oi));
538
}
539
540
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
541
+ plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
542
}
543
544
-void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
545
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
546
+ MemOp memop, TCGType addr_type)
547
{
548
- const MemOpIdx oi = make_memop_idx(memop, idx);
549
-
550
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
551
tcg_debug_assert((memop & MO_SIZE) == MO_128);
552
tcg_debug_assert((memop & MO_SIGN) == 0);
553
+ tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
554
+}
555
+
556
+static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
557
+ TCGArg idx, MemOp memop)
558
+{
559
+ const MemOpIdx orig_oi = make_memop_idx(memop, idx);
560
+ TCGv_i64 ext_addr = NULL;
561
562
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
563
564
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
565
566
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
567
TCGv_i64 lo, hi;
568
- TCGArg addr_arg;
569
- MemOpIdx adj_oi;
570
+ MemOpIdx oi = orig_oi;
571
bool need_bswap = false;
572
573
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
574
- lo = tcg_temp_new_i64();
575
- hi = tcg_temp_new_i64();
576
+ lo = tcg_temp_ebb_new_i64();
577
+ hi = tcg_temp_ebb_new_i64();
578
tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
579
tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
580
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
581
+ oi = make_memop_idx(memop & ~MO_BSWAP, idx);
582
need_bswap = true;
583
} else {
584
lo = TCGV128_LOW(val);
585
hi = TCGV128_HIGH(val);
586
- adj_oi = oi;
587
}
588
589
-#if TARGET_LONG_BITS == 32
590
- addr_arg = tcgv_i32_arg(addr);
591
-#else
592
- addr_arg = tcgv_i64_arg(addr);
593
-#endif
594
- tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
595
+ gen_ldst(INDEX_op_qemu_st_i128, tcgv_i64_temp(lo),
596
+ tcgv_i64_temp(hi), addr, oi);
597
598
if (need_bswap) {
599
tcg_temp_free_i64(lo);
600
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
601
}
602
} else if (use_two_i64_for_i128(memop)) {
603
MemOp mop[2];
604
- TCGv addr_p8;
605
- TCGv_i64 x, y;
606
+ TCGTemp *addr_p8;
607
+ TCGv_i64 x, y, b = NULL;
608
609
canonicalize_memop_i128_as_i64(mop, memop);
610
611
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
612
y = TCGV128_LOW(val);
613
}
614
615
- addr_p8 = tcg_temp_ebb_new();
616
if ((mop[0] ^ memop) & MO_BSWAP) {
617
- TCGv_i64 t = tcg_temp_ebb_new_i64();
618
-
619
- tcg_gen_bswap64_i64(t, x);
620
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
621
- tcg_gen_bswap64_i64(t, y);
622
- tcg_gen_addi_tl(addr_p8, addr, 8);
623
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
624
- tcg_temp_free_i64(t);
625
- } else {
626
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
627
- tcg_gen_addi_tl(addr_p8, addr, 8);
628
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
629
+ b = tcg_temp_ebb_new_i64();
630
+ tcg_gen_bswap64_i64(b, x);
631
+ x = b;
632
}
633
- tcg_temp_free(addr_p8);
634
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
635
+ make_memop_idx(mop[0], idx));
636
+
637
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
638
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
639
+ tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
640
+ addr_p8 = tcgv_i32_temp(t);
641
+ } else {
642
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
643
+ tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
644
+ addr_p8 = tcgv_i64_temp(t);
645
+ }
646
+
647
+ if (b) {
648
+ tcg_gen_bswap64_i64(b, y);
649
+ y = b;
650
+ }
651
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
652
+ make_memop_idx(mop[1], idx));
653
+
654
+ if (b) {
655
+ tcg_temp_free_i64(b);
656
+ }
657
+ tcg_temp_free_internal(addr_p8);
658
} else {
659
- TCGv_i64 a64 = maybe_extend_addr64(addr);
660
- gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
661
- maybe_free_addr64(a64);
662
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
663
+ ext_addr = tcg_temp_ebb_new_i64();
664
+ tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
665
+ addr = tcgv_i64_temp(ext_addr);
666
+ }
667
+ gen_helper_st_i128(cpu_env, temp_tcgv_i64(addr), val,
668
+ tcg_constant_i32(orig_oi));
669
}
670
671
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
672
+ plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W);
673
+}
674
+
675
+void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
676
+ MemOp memop, TCGType addr_type)
677
+{
678
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
679
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
680
+ tcg_debug_assert((memop & MO_SIGN) == 0);
681
+ tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
682
}
683
684
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
685
--
686
2.34.1
687
688
diff view generated by jsdifflib
New patch
1
Expand from TCGv to TCGTemp inline in the translators,
2
and validate that the size matches tcg_ctx->addr_type.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/tcg/tcg-op.h | 184 ++++++++++++++++++++++++++++++----------
8
tcg/tcg-op-ldst.c | 198 ++++++++++++++++++++++++++++---------------
9
2 files changed, 267 insertions(+), 115 deletions(-)
10
11
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg-op.h
14
+++ b/include/tcg/tcg-op.h
15
@@ -XXX,XX +XXX,XX @@ tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
16
tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
17
}
18
19
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
20
- TCGArg, MemOp);
21
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
22
- TCGArg, MemOp);
23
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
24
- TCGArg, MemOp);
25
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
26
+ TCGArg, MemOp, TCGType);
27
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
28
+ TCGArg, MemOp, TCGType);
29
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
30
+ TCGv_i128, TCGArg, MemOp, TCGType);
31
32
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
33
- TCGArg, MemOp);
34
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
35
- TCGArg, MemOp);
36
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
37
- TCGArg, MemOp);
38
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
39
+ TCGArg, MemOp, TCGType);
40
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
41
+ TCGArg, MemOp, TCGType);
42
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
43
+ TCGv_i128, TCGArg, MemOp, TCGType);
44
45
-void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
46
-void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
47
+void tcg_gen_atomic_xchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
48
+ TCGArg, MemOp, TCGType);
49
+void tcg_gen_atomic_xchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
50
+ TCGArg, MemOp, TCGType);
51
52
-void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
53
-void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
54
-void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
55
-void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
56
-void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
57
-void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
58
-void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
59
-void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
60
-void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
61
-void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
62
-void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
63
-void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
64
-void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
65
-void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
66
-void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
67
-void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
68
+void tcg_gen_atomic_fetch_add_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
69
+ TCGArg, MemOp, TCGType);
70
+void tcg_gen_atomic_fetch_add_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
71
+ TCGArg, MemOp, TCGType);
72
+void tcg_gen_atomic_fetch_and_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
73
+ TCGArg, MemOp, TCGType);
74
+void tcg_gen_atomic_fetch_and_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
75
+ TCGArg, MemOp, TCGType);
76
+void tcg_gen_atomic_fetch_or_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
77
+ TCGArg, MemOp, TCGType);
78
+void tcg_gen_atomic_fetch_or_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
79
+ TCGArg, MemOp, TCGType);
80
+void tcg_gen_atomic_fetch_xor_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
81
+ TCGArg, MemOp, TCGType);
82
+void tcg_gen_atomic_fetch_xor_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
83
+ TCGArg, MemOp, TCGType);
84
+void tcg_gen_atomic_fetch_smin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
85
+ TCGArg, MemOp, TCGType);
86
+void tcg_gen_atomic_fetch_smin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
87
+ TCGArg, MemOp, TCGType);
88
+void tcg_gen_atomic_fetch_umin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
89
+ TCGArg, MemOp, TCGType);
90
+void tcg_gen_atomic_fetch_umin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
91
+ TCGArg, MemOp, TCGType);
92
+void tcg_gen_atomic_fetch_smax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
93
+ TCGArg, MemOp, TCGType);
94
+void tcg_gen_atomic_fetch_smax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
95
+ TCGArg, MemOp, TCGType);
96
+void tcg_gen_atomic_fetch_umax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
97
+ TCGArg, MemOp, TCGType);
98
+void tcg_gen_atomic_fetch_umax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
99
+ TCGArg, MemOp, TCGType);
100
101
-void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
102
-void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
103
-void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
104
-void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
105
-void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
106
-void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
107
-void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
108
-void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
109
-void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
110
-void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
111
-void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
112
-void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
113
-void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
114
-void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
115
-void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
116
-void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
117
+void tcg_gen_atomic_add_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
118
+ TCGArg, MemOp, TCGType);
119
+void tcg_gen_atomic_add_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
120
+ TCGArg, MemOp, TCGType);
121
+void tcg_gen_atomic_and_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
122
+ TCGArg, MemOp, TCGType);
123
+void tcg_gen_atomic_and_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
124
+ TCGArg, MemOp, TCGType);
125
+void tcg_gen_atomic_or_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
126
+ TCGArg, MemOp, TCGType);
127
+void tcg_gen_atomic_or_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
128
+ TCGArg, MemOp, TCGType);
129
+void tcg_gen_atomic_xor_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
130
+ TCGArg, MemOp, TCGType);
131
+void tcg_gen_atomic_xor_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
132
+ TCGArg, MemOp, TCGType);
133
+void tcg_gen_atomic_smin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
134
+ TCGArg, MemOp, TCGType);
135
+void tcg_gen_atomic_smin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
136
+ TCGArg, MemOp, TCGType);
137
+void tcg_gen_atomic_umin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
138
+ TCGArg, MemOp, TCGType);
139
+void tcg_gen_atomic_umin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
140
+ TCGArg, MemOp, TCGType);
141
+void tcg_gen_atomic_smax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
142
+ TCGArg, MemOp, TCGType);
143
+void tcg_gen_atomic_smax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
144
+ TCGArg, MemOp, TCGType);
145
+void tcg_gen_atomic_umax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
146
+ TCGArg, MemOp, TCGType);
147
+void tcg_gen_atomic_umax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
148
+ TCGArg, MemOp, TCGType);
149
+
150
+#define DEF_ATOMIC2(N, S) \
151
+ static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S v, \
152
+ TCGArg i, MemOp m) \
153
+ { N##_##S##_chk(r, tcgv_tl_temp(a), v, i, m, TCG_TYPE_TL); }
154
+
155
+#define DEF_ATOMIC3(N, S) \
156
+ static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S o, \
157
+ TCGv_##S n, TCGArg i, MemOp m) \
158
+ { N##_##S##_chk(r, tcgv_tl_temp(a), o, n, i, m, TCG_TYPE_TL); }
159
+
160
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i32)
161
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i64)
162
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i128)
163
+
164
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i32)
165
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i64)
166
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i128)
167
+
168
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i32)
169
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i64)
170
+
171
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i32)
172
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i64)
173
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i32)
174
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i64)
175
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i32)
176
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i64)
177
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i32)
178
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i64)
179
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i32)
180
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i64)
181
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i32)
182
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i64)
183
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i32)
184
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i64)
185
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i32)
186
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i64)
187
+
188
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i32)
189
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i64)
190
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i32)
191
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i64)
192
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i32)
193
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i64)
194
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i32)
195
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i64)
196
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i32)
197
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i64)
198
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i32)
199
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i64)
200
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i32)
201
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i64)
202
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i32)
203
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
204
+
205
+#undef DEF_ATOMIC2
206
+#undef DEF_ATOMIC3
207
208
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
209
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
210
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
211
index XXXXXXX..XXXXXXX 100644
212
--- a/tcg/tcg-op-ldst.c
213
+++ b/tcg/tcg-op-ldst.c
214
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
215
ret[1] = mop_2;
216
}
217
218
-static TCGv_i64 maybe_extend_addr64(TCGv addr)
219
+static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
220
{
221
-#if TARGET_LONG_BITS == 32
222
- TCGv_i64 a64 = tcg_temp_ebb_new_i64();
223
- tcg_gen_extu_i32_i64(a64, addr);
224
- return a64;
225
-#else
226
- return addr;
227
-#endif
228
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
229
+ TCGv_i64 a64 = tcg_temp_ebb_new_i64();
230
+ tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
231
+ return a64;
232
+ }
233
+ return temp_tcgv_i64(addr);
234
}
235
236
static void maybe_free_addr64(TCGv_i64 a64)
237
{
238
-#if TARGET_LONG_BITS == 32
239
- tcg_temp_free_i64(a64);
240
-#endif
241
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
242
+ tcg_temp_free_i64(a64);
243
+ }
244
}
245
246
static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
247
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
248
WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
249
};
250
251
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
252
- TCGv_i32 newv, TCGArg idx, MemOp memop)
253
+static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
254
+ TCGv_i32 cmpv, TCGv_i32 newv,
255
+ TCGArg idx, MemOp memop)
256
{
257
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
258
TCGv_i32 t2 = tcg_temp_ebb_new_i32();
259
260
tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
261
262
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
263
+ tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
264
tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
265
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
266
+ tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
267
tcg_temp_free_i32(t2);
268
269
if (memop & MO_SIGN) {
270
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
271
tcg_temp_free_i32(t1);
272
}
273
274
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
275
- TCGv_i32 newv, TCGArg idx, MemOp memop)
276
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
277
+ TCGv_i32 cmpv, TCGv_i32 newv,
278
+ TCGArg idx, MemOp memop,
279
+ TCGType addr_type)
280
+{
281
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
282
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
283
+ tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
284
+}
285
+
286
+static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
287
+ TCGv_i32 cmpv, TCGv_i32 newv,
288
+ TCGArg idx, MemOp memop)
289
{
290
gen_atomic_cx_i32 gen;
291
TCGv_i64 a64;
292
MemOpIdx oi;
293
294
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
295
- tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
296
+ tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
297
return;
298
}
299
300
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
301
}
302
}
303
304
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
305
- TCGv_i64 newv, TCGArg idx, MemOp memop)
306
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
307
+ TCGv_i32 cmpv, TCGv_i32 newv,
308
+ TCGArg idx, MemOp memop,
309
+ TCGType addr_type)
310
+{
311
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
312
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
313
+ tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
314
+}
315
+
316
+static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
317
+ TCGv_i64 cmpv, TCGv_i64 newv,
318
+ TCGArg idx, MemOp memop)
319
{
320
TCGv_i64 t1, t2;
321
322
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
323
- tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
324
- TCGV_LOW(newv), idx, memop);
325
+ tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
326
+ TCGV_LOW(newv), idx, memop);
327
if (memop & MO_SIGN) {
328
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
329
} else {
330
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
331
332
tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
333
334
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
335
+ tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
336
tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
337
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
338
+ tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
339
tcg_temp_free_i64(t2);
340
341
if (memop & MO_SIGN) {
342
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
343
tcg_temp_free_i64(t1);
344
}
345
346
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
347
- TCGv_i64 newv, TCGArg idx, MemOp memop)
348
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
349
+ TCGv_i64 cmpv, TCGv_i64 newv,
350
+ TCGArg idx, MemOp memop,
351
+ TCGType addr_type)
352
+{
353
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
354
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
355
+ tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
356
+}
357
+
358
+static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
359
+ TCGv_i64 cmpv, TCGv_i64 newv,
360
+ TCGArg idx, MemOp memop)
361
{
362
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
363
- tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
364
+ tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
365
return;
366
}
367
368
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
369
}
370
371
if (TCG_TARGET_REG_BITS == 32) {
372
- tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
373
- TCGV_LOW(newv), idx, memop);
374
+ tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
375
+ TCGV_LOW(newv), idx, memop);
376
if (memop & MO_SIGN) {
377
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
378
} else {
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
380
381
tcg_gen_extrl_i64_i32(c32, cmpv);
382
tcg_gen_extrl_i64_i32(n32, newv);
383
- tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
384
+ tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
385
+ idx, memop & ~MO_SIGN);
386
tcg_temp_free_i32(c32);
387
tcg_temp_free_i32(n32);
388
389
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
390
}
391
}
392
393
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
394
- TCGv_i128 newv, TCGArg idx, MemOp memop)
395
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
396
+ TCGv_i64 cmpv, TCGv_i64 newv,
397
+ TCGArg idx, MemOp memop, TCGType addr_type)
398
+{
399
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
400
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
401
+ tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
402
+}
403
+
404
+static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
405
+ TCGv_i128 cmpv, TCGv_i128 newv,
406
+ TCGArg idx, MemOp memop)
407
{
408
if (TCG_TARGET_REG_BITS == 32) {
409
/* Inline expansion below is simply too large for 32-bit hosts. */
410
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
411
? gen_helper_nonatomic_cmpxchgo_le
412
: gen_helper_nonatomic_cmpxchgo_be);
413
MemOpIdx oi = make_memop_idx(memop, idx);
414
- TCGv_i64 a64;
415
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
416
417
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
418
- tcg_debug_assert((memop & MO_SIGN) == 0);
419
-
420
- a64 = maybe_extend_addr64(addr);
421
gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
422
maybe_free_addr64(a64);
423
} else {
424
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
425
TCGv_i64 t1 = tcg_temp_ebb_new_i64();
426
TCGv_i64 z = tcg_constant_i64(0);
427
428
- tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
429
+ tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
430
431
/* Compare i128 */
432
tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
433
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
434
TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
435
436
/* Unconditional writeback. */
437
- tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
438
+ tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
439
tcg_gen_mov_i128(retv, oldv);
440
441
tcg_temp_free_i64(t0);
442
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
443
}
444
}
445
446
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
447
- TCGv_i128 newv, TCGArg idx, MemOp memop)
448
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
449
+ TCGv_i128 cmpv, TCGv_i128 newv,
450
+ TCGArg idx, MemOp memop,
451
+ TCGType addr_type)
452
+{
453
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
454
+ tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
455
+ tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
456
+}
457
+
458
+static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
459
+ TCGv_i128 cmpv, TCGv_i128 newv,
460
+ TCGArg idx, MemOp memop)
461
{
462
gen_atomic_cx_i128 gen;
463
464
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
465
- tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
466
+ tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
467
return;
468
}
469
470
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
471
- tcg_debug_assert((memop & MO_SIGN) == 0);
472
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
473
-
474
if (gen) {
475
MemOpIdx oi = make_memop_idx(memop, idx);
476
TCGv_i64 a64 = maybe_extend_addr64(addr);
477
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
478
tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
479
}
480
481
-static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
482
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
483
+ TCGv_i128 cmpv, TCGv_i128 newv,
484
+ TCGArg idx, MemOp memop,
485
+ TCGType addr_type)
486
+{
487
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
488
+ tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
489
+ tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
490
+}
491
+
492
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
493
TCGArg idx, MemOp memop, bool new_val,
494
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
495
{
496
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
497
498
memop = tcg_canonicalize_memop(memop, 0, 0);
499
500
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
501
+ tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
502
tcg_gen_ext_i32(t2, val, memop);
503
gen(t2, t1, t2);
504
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
505
+ tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
506
507
tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
508
tcg_temp_free_i32(t1);
509
tcg_temp_free_i32(t2);
510
}
511
512
-static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
513
+static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
514
TCGArg idx, MemOp memop, void * const table[])
515
{
516
gen_atomic_op_i32 gen;
517
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
518
}
519
}
520
521
-static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
522
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
523
TCGArg idx, MemOp memop, bool new_val,
524
void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
525
{
526
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
527
528
memop = tcg_canonicalize_memop(memop, 1, 0);
529
530
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
531
+ tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
532
tcg_gen_ext_i64(t2, val, memop);
533
gen(t2, t1, t2);
534
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
535
+ tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
536
537
tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
538
tcg_temp_free_i64(t1);
539
tcg_temp_free_i64(t2);
540
}
541
542
-static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
543
+static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
544
TCGArg idx, MemOp memop, void * const table[])
545
{
546
memop = tcg_canonicalize_memop(memop, 1, 0);
547
548
if ((memop & MO_SIZE) == MO_64) {
549
-#ifdef CONFIG_ATOMIC64
550
- gen_atomic_op_i64 gen;
551
- TCGv_i64 a64;
552
- MemOpIdx oi;
553
+ gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
554
555
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
556
- tcg_debug_assert(gen != NULL);
557
+ if (gen) {
558
+ MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
559
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
560
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
561
+ maybe_free_addr64(a64);
562
+ return;
563
+ }
564
565
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
566
- a64 = maybe_extend_addr64(addr);
567
- gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
568
- maybe_free_addr64(a64);
569
-#else
570
gen_helper_exit_atomic(cpu_env);
571
/* Produce a result, so that we have a well-formed opcode stream
572
with respect to uses of the result in the (dead) code following. */
573
tcg_gen_movi_i64(ret, 0);
574
-#endif /* CONFIG_ATOMIC64 */
575
} else {
576
TCGv_i32 v32 = tcg_temp_ebb_new_i32();
577
TCGv_i32 r32 = tcg_temp_ebb_new_i32();
578
@@ -XXX,XX +XXX,XX @@ static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
579
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
580
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
581
}; \
582
-void tcg_gen_atomic_##NAME##_i32 \
583
- (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
584
+void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr, \
585
+ TCGv_i32 val, TCGArg idx, \
586
+ MemOp memop, TCGType addr_type) \
587
{ \
588
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type); \
589
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32); \
590
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
591
do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
592
} else { \
593
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_##NAME##_i32 \
594
tcg_gen_##OP##_i32); \
595
} \
596
} \
597
-void tcg_gen_atomic_##NAME##_i64 \
598
- (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
599
+void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr, \
600
+ TCGv_i64 val, TCGArg idx, \
601
+ MemOp memop, TCGType addr_type) \
602
{ \
603
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type); \
604
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64); \
605
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
606
do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
607
} else { \
608
--
609
2.34.1
610
611
diff view generated by jsdifflib
New patch
1
For 32-bit hosts, we cannot simply rely on TCGContext.addr_bits,
2
as we need one or two host registers to represent the guest address.
1
3
4
Create the new opcodes and update all users. Since we have not
5
yet eliminated TARGET_LONG_BITS, only one of the two opcodes will
6
ever be used, so we can get away with treating them the same in
7
the backends.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/tcg/tcg-opc.h | 35 ++++++++----
13
tcg/optimize.c | 19 +++++--
14
tcg/tcg-op-ldst.c | 83 ++++++++++++++++++++++-------
15
tcg/tcg.c | 42 ++++++++++-----
16
tcg/tci.c | 32 +++++++----
17
tcg/aarch64/tcg-target.c.inc | 36 ++++++++-----
18
tcg/arm/tcg-target.c.inc | 83 +++++++++++++++--------------
19
tcg/i386/tcg-target.c.inc | 91 ++++++++++++++++++++------------
20
tcg/loongarch64/tcg-target.c.inc | 24 ++++++---
21
tcg/mips/tcg-target.c.inc | 66 ++++++++++++++---------
22
tcg/ppc/tcg-target.c.inc | 91 +++++++++++++++++++-------------
23
tcg/riscv/tcg-target.c.inc | 24 ++++++---
24
tcg/s390x/tcg-target.c.inc | 36 ++++++++-----
25
tcg/sparc64/tcg-target.c.inc | 24 ++++++---
26
tcg/tci/tcg-target.c.inc | 44 ++++++++-------
27
15 files changed, 468 insertions(+), 262 deletions(-)
28
29
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/tcg/tcg-opc.h
32
+++ b/include/tcg/tcg-opc.h
33
@@ -XXX,XX +XXX,XX @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
34
DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
35
DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
36
37
-#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
38
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
39
40
/* QEMU specific */
41
@@ -XXX,XX +XXX,XX @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
42
DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
43
DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
44
45
-DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
46
+/* Replicate ld/st ops for 32 and 64-bit guest addresses. */
47
+DEF(qemu_ld_a32_i32, 1, 1, 1,
48
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
49
-DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,
50
+DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
51
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
52
-DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
53
+DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
54
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
55
-DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
56
+DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
57
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
58
+
59
+DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
60
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
61
+DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
62
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
63
+DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
64
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
65
+DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
66
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
67
68
/* Only used by i386 to cope with stupid register constraints. */
69
-DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
70
+DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
72
+ IMPL(TCG_TARGET_HAS_qemu_st8_i32))
73
+DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
74
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
75
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
76
77
/* Only for 64-bit hosts at the moment. */
78
-DEF(qemu_ld_i128, 2, 1, 1,
79
+DEF(qemu_ld_a32_i128, 2, 1, 1,
80
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
81
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
82
-DEF(qemu_st_i128, 0, 3, 1,
83
+DEF(qemu_ld_a64_i128, 2, 1, 1,
84
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
85
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
86
+DEF(qemu_st_a32_i128, 0, 3, 1,
87
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
88
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
89
+DEF(qemu_st_a64_i128, 0, 3, 1,
90
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
91
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
92
93
@@ -XXX,XX +XXX,XX @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
94
DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
95
#endif
96
97
-#undef TLADDR_ARGS
98
#undef DATA64_ARGS
99
#undef IMPL
100
#undef IMPL64
101
diff --git a/tcg/optimize.c b/tcg/optimize.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/tcg/optimize.c
104
+++ b/tcg/optimize.c
105
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
106
CASE_OP_32_64_VEC(orc):
107
done = fold_orc(&ctx, op);
108
break;
109
- case INDEX_op_qemu_ld_i32:
110
- case INDEX_op_qemu_ld_i64:
111
+ case INDEX_op_qemu_ld_a32_i32:
112
+ case INDEX_op_qemu_ld_a64_i32:
113
+ case INDEX_op_qemu_ld_a32_i64:
114
+ case INDEX_op_qemu_ld_a64_i64:
115
+ case INDEX_op_qemu_ld_a32_i128:
116
+ case INDEX_op_qemu_ld_a64_i128:
117
done = fold_qemu_ld(&ctx, op);
118
break;
119
- case INDEX_op_qemu_st_i32:
120
- case INDEX_op_qemu_st8_i32:
121
- case INDEX_op_qemu_st_i64:
122
+ case INDEX_op_qemu_st8_a32_i32:
123
+ case INDEX_op_qemu_st8_a64_i32:
124
+ case INDEX_op_qemu_st_a32_i32:
125
+ case INDEX_op_qemu_st_a64_i32:
126
+ case INDEX_op_qemu_st_a32_i64:
127
+ case INDEX_op_qemu_st_a64_i64:
128
+ case INDEX_op_qemu_st_a32_i128:
129
+ case INDEX_op_qemu_st_a64_i128:
130
done = fold_qemu_st(&ctx, op);
131
break;
132
CASE_OP_32_64(rem):
133
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/tcg/tcg-op-ldst.c
136
+++ b/tcg/tcg-op-ldst.c
137
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
138
MemOp orig_memop;
139
MemOpIdx orig_oi, oi;
140
TCGv_i64 copy_addr;
141
+ TCGOpcode opc;
142
143
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
144
orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
145
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
146
}
147
148
copy_addr = plugin_maybe_preserve_addr(addr);
149
- gen_ldst(INDEX_op_qemu_ld_i32, tcgv_i32_temp(val), NULL, addr, oi);
150
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
151
+ opc = INDEX_op_qemu_ld_a32_i32;
152
+ } else {
153
+ opc = INDEX_op_qemu_ld_a64_i32;
154
+ }
155
+ gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
156
plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
157
158
if ((orig_memop ^ memop) & MO_BSWAP) {
159
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
160
}
161
162
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
163
- opc = INDEX_op_qemu_st8_i32;
164
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
165
+ opc = INDEX_op_qemu_st8_a32_i32;
166
+ } else {
167
+ opc = INDEX_op_qemu_st8_a64_i32;
168
+ }
169
} else {
170
- opc = INDEX_op_qemu_st_i32;
171
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
172
+ opc = INDEX_op_qemu_st_a32_i32;
173
+ } else {
174
+ opc = INDEX_op_qemu_st_a64_i32;
175
+ }
176
}
177
gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
178
plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
179
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
180
MemOp orig_memop;
181
MemOpIdx orig_oi, oi;
182
TCGv_i64 copy_addr;
183
+ TCGOpcode opc;
184
185
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
186
tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
187
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
188
}
189
190
copy_addr = plugin_maybe_preserve_addr(addr);
191
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
192
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
193
+ opc = INDEX_op_qemu_ld_a32_i64;
194
+ } else {
195
+ opc = INDEX_op_qemu_ld_a64_i64;
196
+ }
197
+ gen_ldst_i64(opc, val, addr, oi);
198
plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
199
200
if ((orig_memop ^ memop) & MO_BSWAP) {
201
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
202
{
203
TCGv_i64 swap = NULL;
204
MemOpIdx orig_oi, oi;
205
+ TCGOpcode opc;
206
207
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
208
tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
209
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
210
oi = make_memop_idx(memop, idx);
211
}
212
213
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
214
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
215
+ opc = INDEX_op_qemu_st_a32_i64;
216
+ } else {
217
+ opc = INDEX_op_qemu_st_a64_i64;
218
+ }
219
+ gen_ldst_i64(opc, val, addr, oi);
220
plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
221
222
if (swap) {
223
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
224
{
225
const MemOpIdx orig_oi = make_memop_idx(memop, idx);
226
TCGv_i64 ext_addr = NULL;
227
+ TCGOpcode opc;
228
229
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
230
231
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
232
hi = TCGV128_HIGH(val);
233
}
234
235
- gen_ldst(INDEX_op_qemu_ld_i128, tcgv_i64_temp(lo),
236
- tcgv_i64_temp(hi), addr, oi);
237
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
238
+ opc = INDEX_op_qemu_ld_a32_i128;
239
+ } else {
240
+ opc = INDEX_op_qemu_ld_a64_i128;
241
+ }
242
+ gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
243
244
if (need_bswap) {
245
tcg_gen_bswap64_i64(lo, lo);
246
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
247
canonicalize_memop_i128_as_i64(mop, memop);
248
need_bswap = (mop[0] ^ memop) & MO_BSWAP;
249
250
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
251
+ opc = INDEX_op_qemu_ld_a32_i64;
252
+ } else {
253
+ opc = INDEX_op_qemu_ld_a64_i64;
254
+ }
255
+
256
/*
257
* Since there are no global TCGv_i128, there is no visible state
258
* changed if the second load faults. Load directly into the two
259
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
260
}
261
262
oi = make_memop_idx(mop[0], idx);
263
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, oi);
264
+ gen_ldst_i64(opc, x, addr, oi);
265
266
if (need_bswap) {
267
tcg_gen_bswap64_i64(x, x);
268
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
269
addr_p8 = tcgv_i64_temp(t);
270
}
271
272
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, oi);
273
+ gen_ldst_i64(opc, y, addr_p8, oi);
274
tcg_temp_free_internal(addr_p8);
275
276
if (need_bswap) {
277
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
278
{
279
const MemOpIdx orig_oi = make_memop_idx(memop, idx);
280
TCGv_i64 ext_addr = NULL;
281
+ TCGOpcode opc;
282
283
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
284
285
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
286
hi = TCGV128_HIGH(val);
287
}
288
289
- gen_ldst(INDEX_op_qemu_st_i128, tcgv_i64_temp(lo),
290
- tcgv_i64_temp(hi), addr, oi);
291
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
292
+ opc = INDEX_op_qemu_st_a32_i128;
293
+ } else {
294
+ opc = INDEX_op_qemu_st_a64_i128;
295
+ }
296
+ gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
297
298
if (need_bswap) {
299
tcg_temp_free_i64(lo);
300
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
301
302
canonicalize_memop_i128_as_i64(mop, memop);
303
304
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
305
+ opc = INDEX_op_qemu_st_a32_i64;
306
+ } else {
307
+ opc = INDEX_op_qemu_st_a64_i64;
308
+ }
309
+
310
if ((memop & MO_BSWAP) == MO_LE) {
311
x = TCGV128_LOW(val);
312
y = TCGV128_HIGH(val);
313
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
314
tcg_gen_bswap64_i64(b, x);
315
x = b;
316
}
317
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
318
- make_memop_idx(mop[0], idx));
319
+
320
+ gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
321
322
if (tcg_ctx->addr_type == TCG_TYPE_I32) {
323
TCGv_i32 t = tcg_temp_ebb_new_i32();
324
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
325
326
if (b) {
327
tcg_gen_bswap64_i64(b, y);
328
- y = b;
329
- }
330
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
331
- make_memop_idx(mop[1], idx));
332
-
333
- if (b) {
334
+ gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
335
tcg_temp_free_i64(b);
336
+ } else {
337
+ gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
338
}
339
tcg_temp_free_internal(addr_p8);
340
} else {
341
diff --git a/tcg/tcg.c b/tcg/tcg.c
342
index XXXXXXX..XXXXXXX 100644
343
--- a/tcg/tcg.c
344
+++ b/tcg/tcg.c
345
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
346
case INDEX_op_exit_tb:
347
case INDEX_op_goto_tb:
348
case INDEX_op_goto_ptr:
349
- case INDEX_op_qemu_ld_i32:
350
- case INDEX_op_qemu_st_i32:
351
- case INDEX_op_qemu_ld_i64:
352
- case INDEX_op_qemu_st_i64:
353
+ case INDEX_op_qemu_ld_a32_i32:
354
+ case INDEX_op_qemu_ld_a64_i32:
355
+ case INDEX_op_qemu_st_a32_i32:
356
+ case INDEX_op_qemu_st_a64_i32:
357
+ case INDEX_op_qemu_ld_a32_i64:
358
+ case INDEX_op_qemu_ld_a64_i64:
359
+ case INDEX_op_qemu_st_a32_i64:
360
+ case INDEX_op_qemu_st_a64_i64:
361
return true;
362
363
- case INDEX_op_qemu_st8_i32:
364
+ case INDEX_op_qemu_st8_a32_i32:
365
+ case INDEX_op_qemu_st8_a64_i32:
366
return TCG_TARGET_HAS_qemu_st8_i32;
367
368
- case INDEX_op_qemu_ld_i128:
369
- case INDEX_op_qemu_st_i128:
370
+ case INDEX_op_qemu_ld_a32_i128:
371
+ case INDEX_op_qemu_ld_a64_i128:
372
+ case INDEX_op_qemu_st_a32_i128:
373
+ case INDEX_op_qemu_st_a64_i128:
374
return TCG_TARGET_HAS_qemu_ldst_i128;
375
376
case INDEX_op_mov_i32:
377
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
378
}
379
i = 1;
380
break;
381
- case INDEX_op_qemu_ld_i32:
382
- case INDEX_op_qemu_st_i32:
383
- case INDEX_op_qemu_st8_i32:
384
- case INDEX_op_qemu_ld_i64:
385
- case INDEX_op_qemu_st_i64:
386
- case INDEX_op_qemu_ld_i128:
387
- case INDEX_op_qemu_st_i128:
388
+ case INDEX_op_qemu_ld_a32_i32:
389
+ case INDEX_op_qemu_ld_a64_i32:
390
+ case INDEX_op_qemu_st_a32_i32:
391
+ case INDEX_op_qemu_st_a64_i32:
392
+ case INDEX_op_qemu_st8_a32_i32:
393
+ case INDEX_op_qemu_st8_a64_i32:
394
+ case INDEX_op_qemu_ld_a32_i64:
395
+ case INDEX_op_qemu_ld_a64_i64:
396
+ case INDEX_op_qemu_st_a32_i64:
397
+ case INDEX_op_qemu_st_a64_i64:
398
+ case INDEX_op_qemu_ld_a32_i128:
399
+ case INDEX_op_qemu_ld_a64_i128:
400
+ case INDEX_op_qemu_st_a32_i128:
401
+ case INDEX_op_qemu_st_a64_i128:
402
{
403
const char *s_al, *s_op, *s_at;
404
MemOpIdx oi = op->args[k++];
405
diff --git a/tcg/tci.c b/tcg/tci.c
406
index XXXXXXX..XXXXXXX 100644
407
--- a/tcg/tci.c
408
+++ b/tcg/tci.c
409
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
410
tb_ptr = ptr;
411
break;
412
413
- case INDEX_op_qemu_ld_i32:
414
+ case INDEX_op_qemu_ld_a32_i32:
415
+ case INDEX_op_qemu_ld_a64_i32:
416
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
417
tci_args_rrm(insn, &r0, &r1, &oi);
418
taddr = regs[r1];
419
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
420
regs[r0] = tmp32;
421
break;
422
423
- case INDEX_op_qemu_ld_i64:
424
+ case INDEX_op_qemu_ld_a32_i64:
425
+ case INDEX_op_qemu_ld_a64_i64:
426
if (TCG_TARGET_REG_BITS == 64) {
427
tci_args_rrm(insn, &r0, &r1, &oi);
428
taddr = regs[r1];
429
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
430
}
431
break;
432
433
- case INDEX_op_qemu_st_i32:
434
+ case INDEX_op_qemu_st_a32_i32:
435
+ case INDEX_op_qemu_st_a64_i32:
436
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
437
tci_args_rrm(insn, &r0, &r1, &oi);
438
taddr = regs[r1];
439
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
440
tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
441
break;
442
443
- case INDEX_op_qemu_st_i64:
444
+ case INDEX_op_qemu_st_a32_i64:
445
+ case INDEX_op_qemu_st_a64_i64:
446
if (TCG_TARGET_REG_BITS == 64) {
447
tci_args_rrm(insn, &r0, &r1, &oi);
448
taddr = regs[r1];
449
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
450
str_r(r3), str_r(r4), str_r(r5));
451
break;
452
453
- case INDEX_op_qemu_ld_i64:
454
- case INDEX_op_qemu_st_i64:
455
- len = DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
456
+ case INDEX_op_qemu_ld_a32_i32:
457
+ case INDEX_op_qemu_st_a32_i32:
458
+ len = 1 + 1;
459
+ goto do_qemu_ldst;
460
+ case INDEX_op_qemu_ld_a32_i64:
461
+ case INDEX_op_qemu_st_a32_i64:
462
+ case INDEX_op_qemu_ld_a64_i32:
463
+ case INDEX_op_qemu_st_a64_i32:
464
+ len = 1 + DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
465
+ goto do_qemu_ldst;
466
+ case INDEX_op_qemu_ld_a64_i64:
467
+ case INDEX_op_qemu_st_a64_i64:
468
+ len = 2 * DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
469
goto do_qemu_ldst;
470
- case INDEX_op_qemu_ld_i32:
471
- case INDEX_op_qemu_st_i32:
472
- len = 1;
473
do_qemu_ldst:
474
- len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS);
475
switch (len) {
476
case 2:
477
tci_args_rrm(insn, &r0, &r1, &oi);
478
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
479
index XXXXXXX..XXXXXXX 100644
480
--- a/tcg/aarch64/tcg-target.c.inc
481
+++ b/tcg/aarch64/tcg-target.c.inc
482
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
483
tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
484
break;
485
486
- case INDEX_op_qemu_ld_i32:
487
- case INDEX_op_qemu_ld_i64:
488
+ case INDEX_op_qemu_ld_a32_i32:
489
+ case INDEX_op_qemu_ld_a64_i32:
490
+ case INDEX_op_qemu_ld_a32_i64:
491
+ case INDEX_op_qemu_ld_a64_i64:
492
tcg_out_qemu_ld(s, a0, a1, a2, ext);
493
break;
494
- case INDEX_op_qemu_st_i32:
495
- case INDEX_op_qemu_st_i64:
496
+ case INDEX_op_qemu_st_a32_i32:
497
+ case INDEX_op_qemu_st_a64_i32:
498
+ case INDEX_op_qemu_st_a32_i64:
499
+ case INDEX_op_qemu_st_a64_i64:
500
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
501
break;
502
- case INDEX_op_qemu_ld_i128:
503
+ case INDEX_op_qemu_ld_a32_i128:
504
+ case INDEX_op_qemu_ld_a64_i128:
505
tcg_out_qemu_ld128(s, a0, a1, a2, args[3]);
506
break;
507
- case INDEX_op_qemu_st_i128:
508
+ case INDEX_op_qemu_st_a32_i128:
509
+ case INDEX_op_qemu_st_a64_i128:
510
tcg_out_qemu_st128(s, REG0(0), REG0(1), a2, args[3]);
511
break;
512
513
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
514
case INDEX_op_movcond_i64:
515
return C_O1_I4(r, r, rA, rZ, rZ);
516
517
- case INDEX_op_qemu_ld_i32:
518
- case INDEX_op_qemu_ld_i64:
519
+ case INDEX_op_qemu_ld_a32_i32:
520
+ case INDEX_op_qemu_ld_a64_i32:
521
+ case INDEX_op_qemu_ld_a32_i64:
522
+ case INDEX_op_qemu_ld_a64_i64:
523
return C_O1_I1(r, l);
524
- case INDEX_op_qemu_ld_i128:
525
+ case INDEX_op_qemu_ld_a32_i128:
526
+ case INDEX_op_qemu_ld_a64_i128:
527
return C_O2_I1(r, r, l);
528
- case INDEX_op_qemu_st_i32:
529
- case INDEX_op_qemu_st_i64:
530
+ case INDEX_op_qemu_st_a32_i32:
531
+ case INDEX_op_qemu_st_a64_i32:
532
+ case INDEX_op_qemu_st_a32_i64:
533
+ case INDEX_op_qemu_st_a64_i64:
534
return C_O0_I2(lZ, l);
535
- case INDEX_op_qemu_st_i128:
536
+ case INDEX_op_qemu_st_a32_i128:
537
+ case INDEX_op_qemu_st_a64_i128:
538
return C_O0_I3(lZ, lZ, l);
539
540
case INDEX_op_deposit_i32:
541
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
542
index XXXXXXX..XXXXXXX 100644
543
--- a/tcg/arm/tcg-target.c.inc
544
+++ b/tcg/arm/tcg-target.c.inc
545
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
546
ARITH_MOV, args[0], 0, 0);
547
break;
548
549
- case INDEX_op_qemu_ld_i32:
550
- if (TARGET_LONG_BITS == 32) {
551
- tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
552
- args[2], TCG_TYPE_I32);
553
- } else {
554
- tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
555
- args[3], TCG_TYPE_I32);
556
- }
557
+ case INDEX_op_qemu_ld_a32_i32:
558
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
559
break;
560
- case INDEX_op_qemu_ld_i64:
561
- if (TARGET_LONG_BITS == 32) {
562
- tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
563
- args[3], TCG_TYPE_I64);
564
- } else {
565
- tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
566
- args[4], TCG_TYPE_I64);
567
- }
568
+ case INDEX_op_qemu_ld_a64_i32:
569
+ tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
570
+ args[3], TCG_TYPE_I32);
571
break;
572
- case INDEX_op_qemu_st_i32:
573
- if (TARGET_LONG_BITS == 32) {
574
- tcg_out_qemu_st(s, args[0], -1, args[1], -1,
575
- args[2], TCG_TYPE_I32);
576
- } else {
577
- tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
578
- args[3], TCG_TYPE_I32);
579
- }
580
+ case INDEX_op_qemu_ld_a32_i64:
581
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
582
+ args[3], TCG_TYPE_I64);
583
break;
584
- case INDEX_op_qemu_st_i64:
585
- if (TARGET_LONG_BITS == 32) {
586
- tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
587
- args[3], TCG_TYPE_I64);
588
- } else {
589
- tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
590
- args[4], TCG_TYPE_I64);
591
- }
592
+ case INDEX_op_qemu_ld_a64_i64:
593
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
594
+ args[4], TCG_TYPE_I64);
595
+ break;
596
+
597
+ case INDEX_op_qemu_st_a32_i32:
598
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
599
+ break;
600
+ case INDEX_op_qemu_st_a64_i32:
601
+ tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
602
+ args[3], TCG_TYPE_I32);
603
+ break;
604
+ case INDEX_op_qemu_st_a32_i64:
605
+ tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
606
+ args[3], TCG_TYPE_I64);
607
+ break;
608
+ case INDEX_op_qemu_st_a64_i64:
609
+ tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
610
+ args[4], TCG_TYPE_I64);
611
break;
612
613
case INDEX_op_bswap16_i32:
614
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
615
case INDEX_op_setcond2_i32:
616
return C_O1_I4(r, r, r, rI, rI);
617
618
- case INDEX_op_qemu_ld_i32:
619
- return TARGET_LONG_BITS == 32 ? C_O1_I1(r, q) : C_O1_I2(r, q, q);
620
- case INDEX_op_qemu_ld_i64:
621
- return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, q) : C_O2_I2(e, p, q, q);
622
- case INDEX_op_qemu_st_i32:
623
- return TARGET_LONG_BITS == 32 ? C_O0_I2(q, q) : C_O0_I3(q, q, q);
624
- case INDEX_op_qemu_st_i64:
625
- return TARGET_LONG_BITS == 32 ? C_O0_I3(Q, p, q) : C_O0_I4(Q, p, q, q);
626
+ case INDEX_op_qemu_ld_a32_i32:
627
+ return C_O1_I1(r, q);
628
+ case INDEX_op_qemu_ld_a64_i32:
629
+ return C_O1_I2(r, q, q);
630
+ case INDEX_op_qemu_ld_a32_i64:
631
+ return C_O2_I1(e, p, q);
632
+ case INDEX_op_qemu_ld_a64_i64:
633
+ return C_O2_I2(e, p, q, q);
634
+ case INDEX_op_qemu_st_a32_i32:
635
+ return C_O0_I2(q, q);
636
+ case INDEX_op_qemu_st_a64_i32:
637
+ return C_O0_I3(q, q, q);
638
+ case INDEX_op_qemu_st_a32_i64:
639
+ return C_O0_I3(Q, p, q);
640
+ case INDEX_op_qemu_st_a64_i64:
641
+ return C_O0_I4(Q, p, q, q);
642
643
case INDEX_op_st_vec:
644
return C_O0_I2(w, r);
645
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
646
index XXXXXXX..XXXXXXX 100644
647
--- a/tcg/i386/tcg-target.c.inc
648
+++ b/tcg/i386/tcg-target.c.inc
649
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
650
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
651
break;
652
653
- case INDEX_op_qemu_ld_i32:
654
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
655
- tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
656
- } else {
657
+ case INDEX_op_qemu_ld_a64_i32:
658
+ if (TCG_TARGET_REG_BITS == 32) {
659
tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
660
+ break;
661
}
662
+ /* fall through */
663
+ case INDEX_op_qemu_ld_a32_i32:
664
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
665
break;
666
- case INDEX_op_qemu_ld_i64:
667
+ case INDEX_op_qemu_ld_a32_i64:
668
if (TCG_TARGET_REG_BITS == 64) {
669
tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
670
- } else if (TARGET_LONG_BITS == 32) {
671
+ } else {
672
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
673
+ }
674
+ break;
675
+ case INDEX_op_qemu_ld_a64_i64:
676
+ if (TCG_TARGET_REG_BITS == 64) {
677
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
678
} else {
679
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
680
}
681
break;
682
- case INDEX_op_qemu_ld_i128:
683
+ case INDEX_op_qemu_ld_a32_i128:
684
+ case INDEX_op_qemu_ld_a64_i128:
685
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
686
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
687
break;
688
- case INDEX_op_qemu_st_i32:
689
- case INDEX_op_qemu_st8_i32:
690
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
691
- tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
692
- } else {
693
+
694
+ case INDEX_op_qemu_st_a64_i32:
695
+ case INDEX_op_qemu_st8_a64_i32:
696
+ if (TCG_TARGET_REG_BITS == 32) {
697
tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
698
+ break;
699
}
700
+ /* fall through */
701
+ case INDEX_op_qemu_st_a32_i32:
702
+ case INDEX_op_qemu_st8_a32_i32:
703
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
704
break;
705
- case INDEX_op_qemu_st_i64:
706
+ case INDEX_op_qemu_st_a32_i64:
707
if (TCG_TARGET_REG_BITS == 64) {
708
tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
709
- } else if (TARGET_LONG_BITS == 32) {
710
+ } else {
711
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
712
+ }
713
+ break;
714
+ case INDEX_op_qemu_st_a64_i64:
715
+ if (TCG_TARGET_REG_BITS == 64) {
716
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
717
} else {
718
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
719
}
720
break;
721
- case INDEX_op_qemu_st_i128:
722
+ case INDEX_op_qemu_st_a32_i128:
723
+ case INDEX_op_qemu_st_a64_i128:
724
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
725
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
726
break;
727
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
728
case INDEX_op_clz_i64:
729
return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
730
731
- case INDEX_op_qemu_ld_i32:
732
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
733
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
734
+ case INDEX_op_qemu_ld_a32_i32:
735
+ return C_O1_I1(r, L);
736
+ case INDEX_op_qemu_ld_a64_i32:
737
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L);
738
739
- case INDEX_op_qemu_st_i32:
740
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
741
- ? C_O0_I2(L, L) : C_O0_I3(L, L, L));
742
- case INDEX_op_qemu_st8_i32:
743
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
744
- ? C_O0_I2(s, L) : C_O0_I3(s, L, L));
745
+ case INDEX_op_qemu_st_a32_i32:
746
+ return C_O0_I2(L, L);
747
+ case INDEX_op_qemu_st_a64_i32:
748
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
749
+ case INDEX_op_qemu_st8_a32_i32:
750
+ return C_O0_I2(s, L);
751
+ case INDEX_op_qemu_st8_a64_i32:
752
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L);
753
754
- case INDEX_op_qemu_ld_i64:
755
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
756
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
757
- : C_O2_I2(r, r, L, L));
758
+ case INDEX_op_qemu_ld_a32_i64:
759
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L);
760
+ case INDEX_op_qemu_ld_a64_i64:
761
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L);
762
763
- case INDEX_op_qemu_st_i64:
764
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L)
765
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
766
- : C_O0_I4(L, L, L, L));
767
+ case INDEX_op_qemu_st_a32_i64:
768
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
769
+ case INDEX_op_qemu_st_a64_i64:
770
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
771
772
- case INDEX_op_qemu_ld_i128:
773
+ case INDEX_op_qemu_ld_a32_i128:
774
+ case INDEX_op_qemu_ld_a64_i128:
775
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
776
return C_O2_I1(r, r, L);
777
- case INDEX_op_qemu_st_i128:
778
+ case INDEX_op_qemu_st_a32_i128:
779
+ case INDEX_op_qemu_st_a64_i128:
780
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
781
return C_O0_I3(L, L, L);
782
783
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
784
index XXXXXXX..XXXXXXX 100644
785
--- a/tcg/loongarch64/tcg-target.c.inc
786
+++ b/tcg/loongarch64/tcg-target.c.inc
787
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
788
tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
789
break;
790
791
- case INDEX_op_qemu_ld_i32:
792
+ case INDEX_op_qemu_ld_a32_i32:
793
+ case INDEX_op_qemu_ld_a64_i32:
794
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
795
break;
796
- case INDEX_op_qemu_ld_i64:
797
+ case INDEX_op_qemu_ld_a32_i64:
798
+ case INDEX_op_qemu_ld_a64_i64:
799
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
800
break;
801
- case INDEX_op_qemu_st_i32:
802
+ case INDEX_op_qemu_st_a32_i32:
803
+ case INDEX_op_qemu_st_a64_i32:
804
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
805
break;
806
- case INDEX_op_qemu_st_i64:
807
+ case INDEX_op_qemu_st_a32_i64:
808
+ case INDEX_op_qemu_st_a64_i64:
809
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
810
break;
811
812
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
813
case INDEX_op_st32_i64:
814
case INDEX_op_st_i32:
815
case INDEX_op_st_i64:
816
- case INDEX_op_qemu_st_i32:
817
- case INDEX_op_qemu_st_i64:
818
+ case INDEX_op_qemu_st_a32_i32:
819
+ case INDEX_op_qemu_st_a64_i32:
820
+ case INDEX_op_qemu_st_a32_i64:
821
+ case INDEX_op_qemu_st_a64_i64:
822
return C_O0_I2(rZ, r);
823
824
case INDEX_op_brcond_i32:
825
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
826
case INDEX_op_ld32u_i64:
827
case INDEX_op_ld_i32:
828
case INDEX_op_ld_i64:
829
- case INDEX_op_qemu_ld_i32:
830
- case INDEX_op_qemu_ld_i64:
831
+ case INDEX_op_qemu_ld_a32_i32:
832
+ case INDEX_op_qemu_ld_a64_i32:
833
+ case INDEX_op_qemu_ld_a32_i64:
834
+ case INDEX_op_qemu_ld_a64_i64:
835
return C_O1_I1(r, r);
836
837
case INDEX_op_andc_i32:
838
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
839
index XXXXXXX..XXXXXXX 100644
840
--- a/tcg/mips/tcg-target.c.inc
841
+++ b/tcg/mips/tcg-target.c.inc
842
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
843
tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
844
break;
845
846
- case INDEX_op_qemu_ld_i32:
847
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
848
- tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
849
- } else {
850
+ case INDEX_op_qemu_ld_a64_i32:
851
+ if (TCG_TARGET_REG_BITS == 32) {
852
tcg_out_qemu_ld(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
853
+ break;
854
}
855
+ /* fall through */
856
+ case INDEX_op_qemu_ld_a32_i32:
857
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
858
break;
859
- case INDEX_op_qemu_ld_i64:
860
+ case INDEX_op_qemu_ld_a32_i64:
861
if (TCG_TARGET_REG_BITS == 64) {
862
tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
863
- } else if (TARGET_LONG_BITS == 32) {
864
+ } else {
865
tcg_out_qemu_ld(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
866
+ }
867
+ break;
868
+ case INDEX_op_qemu_ld_a64_i64:
869
+ if (TCG_TARGET_REG_BITS == 64) {
870
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
871
} else {
872
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
873
}
874
break;
875
- case INDEX_op_qemu_st_i32:
876
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
877
- tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
878
- } else {
879
+
880
+ case INDEX_op_qemu_st_a64_i32:
881
+ if (TCG_TARGET_REG_BITS == 32) {
882
tcg_out_qemu_st(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
883
+ break;
884
}
885
+ /* fall through */
886
+ case INDEX_op_qemu_st_a32_i32:
887
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
888
break;
889
- case INDEX_op_qemu_st_i64:
890
+ case INDEX_op_qemu_st_a32_i64:
891
if (TCG_TARGET_REG_BITS == 64) {
892
tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
893
- } else if (TARGET_LONG_BITS == 32) {
894
+ } else {
895
tcg_out_qemu_st(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
896
+ }
897
+ break;
898
+ case INDEX_op_qemu_st_a64_i64:
899
+ if (TCG_TARGET_REG_BITS == 64) {
900
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
901
} else {
902
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
903
}
904
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
905
case INDEX_op_brcond2_i32:
906
return C_O0_I4(rZ, rZ, rZ, rZ);
907
908
- case INDEX_op_qemu_ld_i32:
909
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
910
- ? C_O1_I1(r, r) : C_O1_I2(r, r, r));
911
- case INDEX_op_qemu_st_i32:
912
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
913
- ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
914
- case INDEX_op_qemu_ld_i64:
915
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
916
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
917
- : C_O2_I2(r, r, r, r));
918
- case INDEX_op_qemu_st_i64:
919
+ case INDEX_op_qemu_ld_a32_i32:
920
+ return C_O1_I1(r, r);
921
+ case INDEX_op_qemu_ld_a64_i32:
922
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
923
+ case INDEX_op_qemu_st_a32_i32:
924
+ return C_O0_I2(rZ, r);
925
+ case INDEX_op_qemu_st_a64_i32:
926
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r);
927
+ case INDEX_op_qemu_ld_a32_i64:
928
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
929
+ case INDEX_op_qemu_ld_a64_i64:
930
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
931
+ case INDEX_op_qemu_st_a32_i64:
932
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, rZ, r);
933
+ case INDEX_op_qemu_st_a64_i64:
934
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
935
- : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
936
: C_O0_I4(rZ, rZ, r, r));
937
938
default:
939
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
940
index XXXXXXX..XXXXXXX 100644
941
--- a/tcg/ppc/tcg-target.c.inc
942
+++ b/tcg/ppc/tcg-target.c.inc
943
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
944
tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
945
break;
946
947
- case INDEX_op_qemu_ld_i32:
948
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
949
- tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
950
- args[2], TCG_TYPE_I32);
951
- } else {
952
+ case INDEX_op_qemu_ld_a64_i32:
953
+ if (TCG_TARGET_REG_BITS == 32) {
954
tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
955
args[3], TCG_TYPE_I32);
956
+ break;
957
}
958
+ /* fall through */
959
+ case INDEX_op_qemu_ld_a32_i32:
960
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
961
break;
962
- case INDEX_op_qemu_ld_i64:
963
+ case INDEX_op_qemu_ld_a32_i64:
964
if (TCG_TARGET_REG_BITS == 64) {
965
tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
966
args[2], TCG_TYPE_I64);
967
- } else if (TARGET_LONG_BITS == 32) {
968
+ } else {
969
tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
970
args[3], TCG_TYPE_I64);
971
+ }
972
+ break;
973
+ case INDEX_op_qemu_ld_a64_i64:
974
+ if (TCG_TARGET_REG_BITS == 64) {
975
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
976
+ args[2], TCG_TYPE_I64);
977
} else {
978
tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
979
args[4], TCG_TYPE_I64);
980
}
981
break;
982
- case INDEX_op_qemu_ld_i128:
983
+ case INDEX_op_qemu_ld_a32_i128:
984
+ case INDEX_op_qemu_ld_a64_i128:
985
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
986
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
987
break;
988
989
- case INDEX_op_qemu_st_i32:
990
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
991
- tcg_out_qemu_st(s, args[0], -1, args[1], -1,
992
- args[2], TCG_TYPE_I32);
993
- } else {
994
+ case INDEX_op_qemu_st_a64_i32:
995
+ if (TCG_TARGET_REG_BITS == 32) {
996
tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
997
args[3], TCG_TYPE_I32);
998
+ break;
999
}
1000
+ /* fall through */
1001
+ case INDEX_op_qemu_st_a32_i32:
1002
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
1003
break;
1004
- case INDEX_op_qemu_st_i64:
1005
+ case INDEX_op_qemu_st_a32_i64:
1006
if (TCG_TARGET_REG_BITS == 64) {
1007
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
1008
args[2], TCG_TYPE_I64);
1009
- } else if (TARGET_LONG_BITS == 32) {
1010
+ } else {
1011
tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
1012
args[3], TCG_TYPE_I64);
1013
+ }
1014
+ break;
1015
+ case INDEX_op_qemu_st_a64_i64:
1016
+ if (TCG_TARGET_REG_BITS == 64) {
1017
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1,
1018
+ args[2], TCG_TYPE_I64);
1019
} else {
1020
tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
1021
args[4], TCG_TYPE_I64);
1022
}
1023
break;
1024
- case INDEX_op_qemu_st_i128:
1025
+ case INDEX_op_qemu_st_a32_i128:
1026
+ case INDEX_op_qemu_st_a64_i128:
1027
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1028
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
1029
break;
1030
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1031
case INDEX_op_sub2_i32:
1032
return C_O2_I4(r, r, rI, rZM, r, r);
1033
1034
- case INDEX_op_qemu_ld_i32:
1035
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
1036
- ? C_O1_I1(r, r)
1037
- : C_O1_I2(r, r, r));
1038
-
1039
- case INDEX_op_qemu_st_i32:
1040
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
1041
- ? C_O0_I2(r, r)
1042
- : C_O0_I3(r, r, r));
1043
-
1044
- case INDEX_op_qemu_ld_i64:
1045
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
1046
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
1047
- : C_O2_I2(r, r, r, r));
1048
-
1049
- case INDEX_op_qemu_st_i64:
1050
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
1051
- : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
1052
- : C_O0_I4(r, r, r, r));
1053
-
1054
- case INDEX_op_qemu_ld_i128:
1055
+ case INDEX_op_qemu_ld_a32_i32:
1056
+ return C_O1_I1(r, r);
1057
+ case INDEX_op_qemu_ld_a64_i32:
1058
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
1059
+ case INDEX_op_qemu_ld_a32_i64:
1060
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
1061
+ case INDEX_op_qemu_ld_a64_i64:
1062
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
1063
+ case INDEX_op_qemu_ld_a32_i128:
1064
+ case INDEX_op_qemu_ld_a64_i128:
1065
return C_O2_I1(o, m, r);
1066
- case INDEX_op_qemu_st_i128:
1067
+
1068
+ case INDEX_op_qemu_st_a32_i32:
1069
+ return C_O0_I2(r, r);
1070
+ case INDEX_op_qemu_st_a64_i32:
1071
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1072
+ case INDEX_op_qemu_st_a32_i64:
1073
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1074
+ case INDEX_op_qemu_st_a64_i64:
1075
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
1076
+ case INDEX_op_qemu_st_a32_i128:
1077
+ case INDEX_op_qemu_st_a64_i128:
1078
return C_O0_I3(o, m, r);
1079
1080
case INDEX_op_add_vec:
1081
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
1082
index XXXXXXX..XXXXXXX 100644
1083
--- a/tcg/riscv/tcg-target.c.inc
1084
+++ b/tcg/riscv/tcg-target.c.inc
1085
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1086
tcg_out_setcond(s, args[3], a0, a1, a2);
1087
break;
1088
1089
- case INDEX_op_qemu_ld_i32:
1090
+ case INDEX_op_qemu_ld_a32_i32:
1091
+ case INDEX_op_qemu_ld_a64_i32:
1092
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1093
break;
1094
- case INDEX_op_qemu_ld_i64:
1095
+ case INDEX_op_qemu_ld_a32_i64:
1096
+ case INDEX_op_qemu_ld_a64_i64:
1097
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1098
break;
1099
- case INDEX_op_qemu_st_i32:
1100
+ case INDEX_op_qemu_st_a32_i32:
1101
+ case INDEX_op_qemu_st_a64_i32:
1102
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1103
break;
1104
- case INDEX_op_qemu_st_i64:
1105
+ case INDEX_op_qemu_st_a32_i64:
1106
+ case INDEX_op_qemu_st_a64_i64:
1107
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1108
break;
1109
1110
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1111
case INDEX_op_sub2_i64:
1112
return C_O2_I4(r, r, rZ, rZ, rM, rM);
1113
1114
- case INDEX_op_qemu_ld_i32:
1115
- case INDEX_op_qemu_ld_i64:
1116
+ case INDEX_op_qemu_ld_a32_i32:
1117
+ case INDEX_op_qemu_ld_a64_i32:
1118
+ case INDEX_op_qemu_ld_a32_i64:
1119
+ case INDEX_op_qemu_ld_a64_i64:
1120
return C_O1_I1(r, r);
1121
- case INDEX_op_qemu_st_i32:
1122
- case INDEX_op_qemu_st_i64:
1123
+ case INDEX_op_qemu_st_a32_i32:
1124
+ case INDEX_op_qemu_st_a64_i32:
1125
+ case INDEX_op_qemu_st_a32_i64:
1126
+ case INDEX_op_qemu_st_a64_i64:
1127
return C_O0_I2(rZ, r);
1128
1129
default:
1130
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
1131
index XXXXXXX..XXXXXXX 100644
1132
--- a/tcg/s390x/tcg-target.c.inc
1133
+++ b/tcg/s390x/tcg-target.c.inc
1134
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1135
args[2], const_args[2], args[3], const_args[3], args[4]);
1136
break;
1137
1138
- case INDEX_op_qemu_ld_i32:
1139
+ case INDEX_op_qemu_ld_a32_i32:
1140
+ case INDEX_op_qemu_ld_a64_i32:
1141
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
1142
break;
1143
- case INDEX_op_qemu_ld_i64:
1144
+ case INDEX_op_qemu_ld_a32_i64:
1145
+ case INDEX_op_qemu_ld_a64_i64:
1146
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
1147
break;
1148
- case INDEX_op_qemu_st_i32:
1149
+ case INDEX_op_qemu_st_a32_i32:
1150
+ case INDEX_op_qemu_st_a64_i32:
1151
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
1152
break;
1153
- case INDEX_op_qemu_st_i64:
1154
+ case INDEX_op_qemu_st_a32_i64:
1155
+ case INDEX_op_qemu_st_a64_i64:
1156
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
1157
break;
1158
- case INDEX_op_qemu_ld_i128:
1159
+ case INDEX_op_qemu_ld_a32_i128:
1160
+ case INDEX_op_qemu_ld_a64_i128:
1161
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
1162
break;
1163
- case INDEX_op_qemu_st_i128:
1164
+ case INDEX_op_qemu_st_a32_i128:
1165
+ case INDEX_op_qemu_st_a64_i128:
1166
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
1167
break;
1168
1169
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1170
case INDEX_op_ctpop_i64:
1171
return C_O1_I1(r, r);
1172
1173
- case INDEX_op_qemu_ld_i32:
1174
- case INDEX_op_qemu_ld_i64:
1175
+ case INDEX_op_qemu_ld_a32_i32:
1176
+ case INDEX_op_qemu_ld_a64_i32:
1177
+ case INDEX_op_qemu_ld_a32_i64:
1178
+ case INDEX_op_qemu_ld_a64_i64:
1179
return C_O1_I1(r, r);
1180
- case INDEX_op_qemu_st_i64:
1181
- case INDEX_op_qemu_st_i32:
1182
+ case INDEX_op_qemu_st_a32_i64:
1183
+ case INDEX_op_qemu_st_a64_i64:
1184
+ case INDEX_op_qemu_st_a32_i32:
1185
+ case INDEX_op_qemu_st_a64_i32:
1186
return C_O0_I2(r, r);
1187
- case INDEX_op_qemu_ld_i128:
1188
+ case INDEX_op_qemu_ld_a32_i128:
1189
+ case INDEX_op_qemu_ld_a64_i128:
1190
return C_O2_I1(o, m, r);
1191
- case INDEX_op_qemu_st_i128:
1192
+ case INDEX_op_qemu_st_a32_i128:
1193
+ case INDEX_op_qemu_st_a64_i128:
1194
return C_O0_I3(o, m, r);
1195
1196
case INDEX_op_deposit_i32:
1197
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
1198
index XXXXXXX..XXXXXXX 100644
1199
--- a/tcg/sparc64/tcg-target.c.inc
1200
+++ b/tcg/sparc64/tcg-target.c.inc
1201
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1202
tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1203
break;
1204
1205
- case INDEX_op_qemu_ld_i32:
1206
+ case INDEX_op_qemu_ld_a32_i32:
1207
+ case INDEX_op_qemu_ld_a64_i32:
1208
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1209
break;
1210
- case INDEX_op_qemu_ld_i64:
1211
+ case INDEX_op_qemu_ld_a32_i64:
1212
+ case INDEX_op_qemu_ld_a64_i64:
1213
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1214
break;
1215
- case INDEX_op_qemu_st_i32:
1216
+ case INDEX_op_qemu_st_a32_i32:
1217
+ case INDEX_op_qemu_st_a64_i32:
1218
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1219
break;
1220
- case INDEX_op_qemu_st_i64:
1221
+ case INDEX_op_qemu_st_a32_i64:
1222
+ case INDEX_op_qemu_st_a64_i64:
1223
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1224
break;
1225
1226
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1227
case INDEX_op_extu_i32_i64:
1228
case INDEX_op_extrl_i64_i32:
1229
case INDEX_op_extrh_i64_i32:
1230
- case INDEX_op_qemu_ld_i32:
1231
- case INDEX_op_qemu_ld_i64:
1232
+ case INDEX_op_qemu_ld_a32_i32:
1233
+ case INDEX_op_qemu_ld_a64_i32:
1234
+ case INDEX_op_qemu_ld_a32_i64:
1235
+ case INDEX_op_qemu_ld_a64_i64:
1236
return C_O1_I1(r, r);
1237
1238
case INDEX_op_st8_i32:
1239
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1240
case INDEX_op_st_i32:
1241
case INDEX_op_st32_i64:
1242
case INDEX_op_st_i64:
1243
- case INDEX_op_qemu_st_i32:
1244
- case INDEX_op_qemu_st_i64:
1245
+ case INDEX_op_qemu_st_a32_i32:
1246
+ case INDEX_op_qemu_st_a64_i32:
1247
+ case INDEX_op_qemu_st_a32_i64:
1248
+ case INDEX_op_qemu_st_a64_i64:
1249
return C_O0_I2(rZ, r);
1250
1251
case INDEX_op_add_i32:
1252
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
1253
index XXXXXXX..XXXXXXX 100644
1254
--- a/tcg/tci/tcg-target.c.inc
1255
+++ b/tcg/tci/tcg-target.c.inc
1256
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1257
case INDEX_op_setcond2_i32:
1258
return C_O1_I4(r, r, r, r, r);
1259
1260
- case INDEX_op_qemu_ld_i32:
1261
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1262
- ? C_O1_I1(r, r)
1263
- : C_O1_I2(r, r, r));
1264
- case INDEX_op_qemu_ld_i64:
1265
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
1266
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, r)
1267
- : C_O2_I2(r, r, r, r));
1268
- case INDEX_op_qemu_st_i32:
1269
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1270
- ? C_O0_I2(r, r)
1271
- : C_O0_I3(r, r, r));
1272
- case INDEX_op_qemu_st_i64:
1273
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
1274
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(r, r, r)
1275
- : C_O0_I4(r, r, r, r));
1276
+ case INDEX_op_qemu_ld_a32_i32:
1277
+ return C_O1_I1(r, r);
1278
+ case INDEX_op_qemu_ld_a64_i32:
1279
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
1280
+ case INDEX_op_qemu_ld_a32_i64:
1281
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
1282
+ case INDEX_op_qemu_ld_a64_i64:
1283
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
1284
+ case INDEX_op_qemu_st_a32_i32:
1285
+ return C_O0_I2(r, r);
1286
+ case INDEX_op_qemu_st_a64_i32:
1287
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1288
+ case INDEX_op_qemu_st_a32_i64:
1289
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1290
+ case INDEX_op_qemu_st_a64_i64:
1291
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
1292
1293
default:
1294
g_assert_not_reached();
1295
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1296
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
1297
break;
1298
1299
- case INDEX_op_qemu_ld_i32:
1300
- case INDEX_op_qemu_st_i32:
1301
+ case INDEX_op_qemu_ld_a32_i32:
1302
+ case INDEX_op_qemu_ld_a64_i32:
1303
+ case INDEX_op_qemu_st_a32_i32:
1304
+ case INDEX_op_qemu_st_a64_i32:
1305
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
1306
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
1307
} else {
1308
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1309
}
1310
break;
1311
1312
- case INDEX_op_qemu_ld_i64:
1313
- case INDEX_op_qemu_st_i64:
1314
+ case INDEX_op_qemu_ld_a32_i64:
1315
+ case INDEX_op_qemu_ld_a64_i64:
1316
+ case INDEX_op_qemu_st_a32_i64:
1317
+ case INDEX_op_qemu_st_a64_i64:
1318
if (TCG_TARGET_REG_BITS == 64) {
1319
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
1320
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
1321
--
1322
2.34.1
1323
1324
diff view generated by jsdifflib
New patch
1
We now have the address size as part of the opcode, so
2
we no longer need to test TARGET_LONG_BITS. We can use
3
uint64_t for target_ulong, as passed into load/store helpers.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/tci.c | 61 +++++++++++++++++++++++++---------------
9
tcg/tci/tcg-target.c.inc | 15 +++++-----
10
2 files changed, 46 insertions(+), 30 deletions(-)
11
12
diff --git a/tcg/tci.c b/tcg/tci.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/tci.c
15
+++ b/tcg/tci.c
16
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
17
return result;
18
}
19
20
-static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
21
+static uint64_t tci_qemu_ld(CPUArchState *env, uint64_t taddr,
22
MemOpIdx oi, const void *tb_ptr)
23
{
24
MemOp mop = get_memop(oi);
25
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
26
}
27
}
28
29
-static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
30
+static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val,
31
MemOpIdx oi, const void *tb_ptr)
32
{
33
MemOp mop = get_memop(oi);
34
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
35
TCGReg r0, r1, r2, r3, r4, r5;
36
tcg_target_ulong t1;
37
TCGCond condition;
38
- target_ulong taddr;
39
uint8_t pos, len;
40
uint32_t tmp32;
41
- uint64_t tmp64;
42
+ uint64_t tmp64, taddr;
43
uint64_t T1, T2;
44
MemOpIdx oi;
45
int32_t ofs;
46
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
47
break;
48
49
case INDEX_op_qemu_ld_a32_i32:
50
+ tci_args_rrm(insn, &r0, &r1, &oi);
51
+ taddr = (uint32_t)regs[r1];
52
+ goto do_ld_i32;
53
case INDEX_op_qemu_ld_a64_i32:
54
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
55
+ if (TCG_TARGET_REG_BITS == 64) {
56
tci_args_rrm(insn, &r0, &r1, &oi);
57
taddr = regs[r1];
58
} else {
59
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
60
taddr = tci_uint64(regs[r2], regs[r1]);
61
}
62
- tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr);
63
- regs[r0] = tmp32;
64
+ do_ld_i32:
65
+ regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
66
break;
67
68
case INDEX_op_qemu_ld_a32_i64:
69
+ if (TCG_TARGET_REG_BITS == 64) {
70
+ tci_args_rrm(insn, &r0, &r1, &oi);
71
+ taddr = (uint32_t)regs[r1];
72
+ } else {
73
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
74
+ taddr = (uint32_t)regs[r2];
75
+ }
76
+ goto do_ld_i64;
77
case INDEX_op_qemu_ld_a64_i64:
78
if (TCG_TARGET_REG_BITS == 64) {
79
tci_args_rrm(insn, &r0, &r1, &oi);
80
taddr = regs[r1];
81
- } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
82
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
83
- taddr = regs[r2];
84
} else {
85
tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
86
taddr = tci_uint64(regs[r3], regs[r2]);
87
oi = regs[r4];
88
}
89
+ do_ld_i64:
90
tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
91
if (TCG_TARGET_REG_BITS == 32) {
92
tci_write_reg64(regs, r1, r0, tmp64);
93
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
94
break;
95
96
case INDEX_op_qemu_st_a32_i32:
97
+ tci_args_rrm(insn, &r0, &r1, &oi);
98
+ taddr = (uint32_t)regs[r1];
99
+ goto do_st_i32;
100
case INDEX_op_qemu_st_a64_i32:
101
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
102
+ if (TCG_TARGET_REG_BITS == 64) {
103
tci_args_rrm(insn, &r0, &r1, &oi);
104
taddr = regs[r1];
105
} else {
106
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
107
taddr = tci_uint64(regs[r2], regs[r1]);
108
}
109
- tmp32 = regs[r0];
110
- tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
111
+ do_st_i32:
112
+ tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
113
break;
114
115
case INDEX_op_qemu_st_a32_i64:
116
+ if (TCG_TARGET_REG_BITS == 64) {
117
+ tci_args_rrm(insn, &r0, &r1, &oi);
118
+ tmp64 = regs[r0];
119
+ taddr = (uint32_t)regs[r1];
120
+ } else {
121
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
122
+ tmp64 = tci_uint64(regs[r1], regs[r0]);
123
+ taddr = (uint32_t)regs[r2];
124
+ }
125
+ goto do_st_i64;
126
case INDEX_op_qemu_st_a64_i64:
127
if (TCG_TARGET_REG_BITS == 64) {
128
tci_args_rrm(insn, &r0, &r1, &oi);
129
- taddr = regs[r1];
130
tmp64 = regs[r0];
131
+ taddr = regs[r1];
132
} else {
133
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
134
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
135
- taddr = regs[r2];
136
- } else {
137
- tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
138
- taddr = tci_uint64(regs[r3], regs[r2]);
139
- oi = regs[r4];
140
- }
141
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
142
tmp64 = tci_uint64(regs[r1], regs[r0]);
143
+ taddr = tci_uint64(regs[r3], regs[r2]);
144
+ oi = regs[r4];
145
}
146
+ do_st_i64:
147
tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
148
break;
149
150
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
151
index XXXXXXX..XXXXXXX 100644
152
--- a/tcg/tci/tcg-target.c.inc
153
+++ b/tcg/tci/tcg-target.c.inc
154
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
155
return false;
156
}
157
158
-static void stack_bounds_check(TCGReg base, target_long offset)
159
+static void stack_bounds_check(TCGReg base, intptr_t offset)
160
{
161
if (base == TCG_REG_CALL_STACK) {
162
tcg_debug_assert(offset >= 0);
163
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
164
break;
165
166
case INDEX_op_qemu_ld_a32_i32:
167
- case INDEX_op_qemu_ld_a64_i32:
168
case INDEX_op_qemu_st_a32_i32:
169
+ tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
170
+ break;
171
+ case INDEX_op_qemu_ld_a64_i32:
172
case INDEX_op_qemu_st_a64_i32:
173
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
174
+ case INDEX_op_qemu_ld_a32_i64:
175
+ case INDEX_op_qemu_st_a32_i64:
176
+ if (TCG_TARGET_REG_BITS == 64) {
177
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
178
} else {
179
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
180
}
181
break;
182
-
183
- case INDEX_op_qemu_ld_a32_i64:
184
case INDEX_op_qemu_ld_a64_i64:
185
- case INDEX_op_qemu_st_a32_i64:
186
case INDEX_op_qemu_st_a64_i64:
187
if (TCG_TARGET_REG_BITS == 64) {
188
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
189
- } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
190
- tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
191
} else {
192
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
193
tcg_out_op_rrrrr(s, opc, args[0], args[1],
194
--
195
2.34.1
196
197
diff view generated by jsdifflib
New patch
1
Keep all 32-bit values zero extended in the register, not solely when
2
addresses are 32 bits. This eliminates a dependency on TARGET_LONG_BITS.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target.h | 6 +++---
8
1 file changed, 3 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.h
13
+++ b/tcg/i386/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
15
#define TCG_TARGET_HAS_mulsh_i32 0
16
17
#if TCG_TARGET_REG_BITS == 64
18
-/* Keep target addresses zero-extended in a register. */
19
-#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32)
20
-#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32)
21
+/* Keep 32-bit values zero-extended in a register. */
22
+#define TCG_TARGET_HAS_extrl_i64_i32 1
23
+#define TCG_TARGET_HAS_extrh_i64_i32 1
24
#define TCG_TARGET_HAS_div2_i64 1
25
#define TCG_TARGET_HAS_rot_i64 1
26
#define TCG_TARGET_HAS_ext8s_i64 1
27
--
28
2.34.1
29
30
diff view generated by jsdifflib
New patch
1
Since TCG_TYPE_I32 values are kept zero-extended in registers, via
2
omission of the REXW bit, we need not extend if the register matches.
3
This is already relied upon by qemu_{ld,st}.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.c.inc | 4 +++-
10
1 file changed, 3 insertions(+), 1 deletion(-)
11
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
17
18
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
19
{
20
- tcg_out_ext32u(s, dest, src);
21
+ if (dest != src) {
22
+ tcg_out_ext32u(s, dest, src);
23
+ }
24
}
25
26
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
27
--
28
2.34.1
29
30
diff view generated by jsdifflib
New patch
1
Because of its use on tgen_arithi, this value must be a signed
2
32-bit quantity, as that is what may be encoded in the insn.
3
The truncation of the value to unsigned for 32-bit guests is
4
done via the REX bit via 'trexw'.
1
5
6
Removes the only uses of target_ulong from this tcg backend.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/i386/tcg-target.c.inc | 4 ++--
12
1 file changed, 2 insertions(+), 2 deletions(-)
13
14
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/i386/tcg-target.c.inc
17
+++ b/tcg/i386/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
19
int trexw = 0, hrexw = 0, tlbrexw = 0;
20
unsigned mem_index = get_mmuidx(oi);
21
unsigned s_mask = (1 << s_bits) - 1;
22
- target_ulong tlb_mask;
23
+ int tlb_mask;
24
25
ldst = new_ldst_label(s);
26
ldst->is_ld = is_ld;
27
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
29
addrlo, s_mask - a_mask);
30
}
31
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
32
+ tlb_mask = TARGET_PAGE_MASK | a_mask;
33
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
34
35
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
36
--
37
2.34.1
38
39
diff view generated by jsdifflib
New patch
1
All uses can be infered from the INDEX_op_qemu_*_a{32,64}_* opcode
2
being used. Add a field into TCGLabelQemuLdst to record the usage.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target.c.inc | 8 +++-----
8
1 file changed, 3 insertions(+), 5 deletions(-)
9
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.c.inc
13
+++ b/tcg/i386/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
ldst->addrhi_reg = addrhi;
16
17
if (TCG_TARGET_REG_BITS == 64) {
18
- if (TARGET_LONG_BITS == 64) {
19
- ttype = TCG_TYPE_I64;
20
- trexw = P_REXW;
21
- }
22
+ ttype = s->addr_type;
23
+ trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
24
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
25
hrexw = P_REXW;
26
if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
27
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
ldst->label_ptr[0] = s->code_ptr;
29
s->code_ptr += 4;
30
31
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
32
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
33
/* cmp 4(TCG_REG_L0), addrhi */
34
tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
35
36
--
37
2.34.1
38
39
diff view generated by jsdifflib
1
This bit is not saved across interrupts, so we must
1
All uses can be infered from the INDEX_op_qemu_*_a{32,64}_*
2
delay delivering the interrupt until the skip has
2
opcode being used.
3
been processed.
4
3
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1118
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
6
---
10
target/avr/helper.c | 9 +++++++++
7
tcg/arm/tcg-target.c.inc | 14 +++++++-------
11
target/avr/translate.c | 26 ++++++++++++++++++++++----
8
1 file changed, 7 insertions(+), 7 deletions(-)
12
2 files changed, 31 insertions(+), 4 deletions(-)
13
9
14
diff --git a/target/avr/helper.c b/target/avr/helper.c
10
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/target/avr/helper.c
12
--- a/tcg/arm/tcg-target.c.inc
17
+++ b/target/avr/helper.c
13
+++ b/tcg/arm/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
19
AVRCPU *cpu = AVR_CPU(cs);
15
* Load the tlb comparator into R2/R3 and the fast path addend into R1.
20
CPUAVRState *env = &cpu->env;
16
*/
21
17
if (cmp_off == 0) {
22
+ /*
18
- if (TARGET_LONG_BITS == 64) {
23
+ * We cannot separate a skip from the next instruction,
19
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
24
+ * as the skip would not be preserved across the interrupt.
20
- } else {
25
+ * Separating the two insn normally only happens at page boundaries.
21
+ if (s->addr_type == TCG_TYPE_I32) {
26
+ */
22
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
27
+ if (env->skip) {
23
+ } else {
28
+ return false;
24
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
29
+ }
25
}
30
+
26
} else {
31
if (interrupt_request & CPU_INTERRUPT_RESET) {
27
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
32
if (cpu_interrupts_enabled(env)) {
28
TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
33
cs->exception_index = EXCP_RESET;
29
- if (TARGET_LONG_BITS == 64) {
34
diff --git a/target/avr/translate.c b/target/avr/translate.c
30
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
35
index XXXXXXX..XXXXXXX 100644
31
- } else {
36
--- a/target/avr/translate.c
32
+ if (s->addr_type == TCG_TYPE_I32) {
37
+++ b/target/avr/translate.c
33
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
38
@@ -XXX,XX +XXX,XX @@ static void avr_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs)
34
+ } else {
39
if (skip_label) {
35
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
40
canonicalize_skip(ctx);
41
gen_set_label(skip_label);
42
- if (ctx->base.is_jmp == DISAS_NORETURN) {
43
+
44
+ switch (ctx->base.is_jmp) {
45
+ case DISAS_NORETURN:
46
ctx->base.is_jmp = DISAS_CHAIN;
47
+ break;
48
+ case DISAS_NEXT:
49
+ if (ctx->base.tb->flags & TB_FLAGS_SKIP) {
50
+ ctx->base.is_jmp = DISAS_TOO_MANY;
51
+ }
52
+ break;
53
+ default:
54
+ break;
55
}
36
}
56
}
37
}
57
38
58
@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
39
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
59
{
40
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
60
DisasContext *ctx = container_of(dcbase, DisasContext, base);
41
}
61
bool nonconst_skip = canonicalize_skip(ctx);
42
62
+ /*
43
- if (TARGET_LONG_BITS == 64) {
63
+ * Because we disable interrupts while env->skip is set,
44
+ if (s->addr_type != TCG_TYPE_I32) {
64
+ * we must return to the main loop to re-evaluate afterward.
45
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
65
+ */
46
}
66
+ bool force_exit = ctx->base.tb->flags & TB_FLAGS_SKIP;
47
#else
67
68
switch (ctx->base.is_jmp) {
69
case DISAS_NORETURN:
70
@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
71
case DISAS_NEXT:
72
case DISAS_TOO_MANY:
73
case DISAS_CHAIN:
74
- if (!nonconst_skip) {
75
+ if (!nonconst_skip && !force_exit) {
76
/* Note gen_goto_tb checks singlestep. */
77
gen_goto_tb(ctx, 1, ctx->npc);
78
break;
79
@@ -XXX,XX +XXX,XX @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
80
tcg_gen_movi_tl(cpu_pc, ctx->npc);
81
/* fall through */
82
case DISAS_LOOKUP:
83
- tcg_gen_lookup_and_goto_ptr();
84
- break;
85
+ if (!force_exit) {
86
+ tcg_gen_lookup_and_goto_ptr();
87
+ break;
88
+ }
89
+ /* fall through */
90
case DISAS_EXIT:
91
tcg_gen_exit_tb(NULL, 0);
92
break;
93
--
48
--
94
2.34.1
49
2.34.1
95
50
96
51
diff view generated by jsdifflib
New patch
1
Eliminate the test vs TARGET_LONG_BITS by considering this
2
predicate to be always true, and simplify accordingly.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target.c.inc | 19 +++++++++----------
8
1 file changed, 9 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.c.inc
13
+++ b/tcg/aarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ bool have_lse2;
15
#define TCG_VEC_TMP0 TCG_REG_V31
16
17
#ifndef CONFIG_SOFTMMU
18
-/* Note that XZR cannot be encoded in the address base register slot,
19
- as that actaully encodes SP. So if we need to zero-extend the guest
20
- address, via the address index register slot, we need to load even
21
- a zero guest base into a register. */
22
-#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
23
#define TCG_REG_GUEST_BASE TCG_REG_X28
24
#endif
25
26
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
27
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
28
}
29
30
- if (USE_GUEST_BASE) {
31
+ if (guest_base || addr_type == TCG_TYPE_I32) {
32
h->base = TCG_REG_GUEST_BASE;
33
h->index = addr_reg;
34
h->index_ext = addr_type;
35
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
36
CPU_TEMP_BUF_NLONGS * sizeof(long));
37
38
#if !defined(CONFIG_SOFTMMU)
39
- if (USE_GUEST_BASE) {
40
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
41
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
42
- }
43
+ /*
44
+ * Note that XZR cannot be encoded in the address base register slot,
45
+ * as that actaully encodes SP. Depending on the guest, we may need
46
+ * to zero-extend the guest address via the address index register slot,
47
+ * therefore we need to load even a zero guest base into a register.
48
+ */
49
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
50
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
51
#endif
52
53
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
54
--
55
2.34.1
56
57
diff view generated by jsdifflib
New patch
1
All uses replaced with TCGContext.addr_type.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/aarch64/tcg-target.c.inc | 11 +++++------
7
1 file changed, 5 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/aarch64/tcg-target.c.inc
12
+++ b/tcg/aarch64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
14
TCGReg addr_reg, MemOpIdx oi,
15
bool is_ld)
16
{
17
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
18
+ TCGType addr_type = s->addr_type;
19
TCGLabelQemuLdst *ldst = NULL;
20
MemOp opc = get_memop(oi);
21
MemOp s_bits = opc & MO_SIZE;
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
24
25
/* Load the tlb comparator into X0, and the fast path addend into X1. */
26
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
27
+ tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
28
is_ld ? offsetof(CPUTLBEntry, addr_read)
29
: offsetof(CPUTLBEntry, addr_write));
30
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
31
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
32
if (a_mask >= s_mask) {
33
x3 = addr_reg;
34
} else {
35
- tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
36
+ tcg_out_insn(s, 3401, ADDI, addr_type,
37
TCG_REG_X3, addr_reg, s_mask - a_mask);
38
x3 = TCG_REG_X3;
39
}
40
compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
41
42
/* Store the page mask part of the address into X3. */
43
- tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
44
- TCG_REG_X3, x3, compare_mask);
45
+ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
46
47
/* Perform the address comparison. */
48
- tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
49
+ tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
50
51
/* If not equal, we jump to the slow path. */
52
ldst->label_ptr[0] = s->code_ptr;
53
--
54
2.34.1
55
56
diff view generated by jsdifflib
New patch
1
All uses replaced with TCGContext.addr_type.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/loongarch64/tcg-target.c.inc | 9 +++++----
7
1 file changed, 5 insertions(+), 4 deletions(-)
8
9
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/loongarch64/tcg-target.c.inc
12
+++ b/tcg/loongarch64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
14
TCGReg addr_reg, MemOpIdx oi,
15
bool is_ld)
16
{
17
+ TCGType addr_type = s->addr_type;
18
TCGLabelQemuLdst *ldst = NULL;
19
MemOp opc = get_memop(oi);
20
MemOp a_bits;
21
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
22
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
23
24
/* Load the tlb comparator and the addend. */
25
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
26
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
27
is_ld ? offsetof(CPUTLBEntry, addr_read)
28
: offsetof(CPUTLBEntry, addr_write));
29
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
30
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
31
if (a_bits < s_bits) {
32
unsigned a_mask = (1u << a_bits) - 1;
33
unsigned s_mask = (1u << s_bits) - 1;
34
- tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
35
+ tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
36
} else {
37
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg);
38
+ tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
39
}
40
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
41
a_bits, TARGET_PAGE_BITS - 1);
42
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
43
h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
44
#endif
45
46
- if (TARGET_LONG_BITS == 32) {
47
+ if (addr_type == TCG_TYPE_I32) {
48
h->base = TCG_REG_TMP0;
49
tcg_out_ext32u(s, h->base, addr_reg);
50
} else {
51
--
52
2.34.1
53
54
diff view generated by jsdifflib
New patch
1
All uses replaced with TCGContext.addr_type.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/mips/tcg-target.c.inc | 42 +++++++++++++++++++++------------------
7
1 file changed, 23 insertions(+), 19 deletions(-)
8
9
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/mips/tcg-target.c.inc
12
+++ b/tcg/mips/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ typedef enum {
14
/* Aliases for convenience. */
15
ALIAS_PADD = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
16
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
17
- ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
18
- ? OPC_SRL : OPC_DSRL,
19
- ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
20
- ? OPC_ADDIU : OPC_DADDIU,
21
} MIPSInsn;
22
23
/*
24
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
25
TCGReg addrlo, TCGReg addrhi,
26
MemOpIdx oi, bool is_ld)
27
{
28
+ TCGType addr_type = s->addr_type;
29
TCGLabelQemuLdst *ldst = NULL;
30
MemOp opc = get_memop(oi);
31
MemOp a_bits;
32
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
33
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
34
35
/* Extract the TLB index from the address into TMP3. */
36
- tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
37
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
38
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
39
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
40
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
41
+ } else {
42
+ tcg_out_dsrl(s, TCG_TMP3, addrlo,
43
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
44
+ }
45
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
46
47
/* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
48
tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
49
50
- /* Load the (low-half) tlb comparator. */
51
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
52
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
53
- } else {
54
- tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
55
- }
56
-
57
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
58
+ if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
59
+ /* Load the tlb comparator. */
60
+ tcg_out_ld(s, addr_type, TCG_TMP0, TCG_TMP3, cmp_off);
61
/* Load the tlb addend for the fast path. */
62
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
63
+ } else {
64
+ /* Load the low half of the tlb comparator. */
65
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
66
}
67
68
/*
69
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
70
* For unaligned accesses, compare against the end of the access to
71
* verify that it does not cross a page boundary.
72
*/
73
- tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
74
+ tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
75
if (a_mask < s_mask) {
76
- tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
77
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
78
+ tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
79
+ } else {
80
+ tcg_out_opc_imm(s, OPC_DADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
81
+ }
82
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
83
} else {
84
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
85
}
86
87
/* Zero extend a 32-bit guest address for a 64-bit host. */
88
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
89
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
90
tcg_out_ext32u(s, TCG_TMP2, addrlo);
91
addrlo = TCG_TMP2;
92
}
93
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
95
96
/* Load and test the high half tlb comparator. */
97
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
98
+ if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
99
/* delay slot */
100
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
101
102
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
103
}
104
105
base = addrlo;
106
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
107
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
108
tcg_out_ext32u(s, TCG_REG_A0, base);
109
base = TCG_REG_A0;
110
}
111
--
112
2.34.1
113
114
diff view generated by jsdifflib
1
There is no need to go through cc->tcg_ops when
1
All uses replaced with TCGContext.addr_type.
2
we know what value that must have.
3
2
4
Reviewed-by: Michael Rolnik <mrolnik@gmail.com>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
target/avr/helper.c | 5 ++---
6
tcg/tcg.c | 27 ++++++++++++++-------------
9
1 file changed, 2 insertions(+), 3 deletions(-)
7
1 file changed, 14 insertions(+), 13 deletions(-)
10
8
11
diff --git a/target/avr/helper.c b/target/avr/helper.c
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/target/avr/helper.c
11
--- a/tcg/tcg.c
14
+++ b/target/avr/helper.c
12
+++ b/tcg/tcg.c
15
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
16
bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
14
next_arg = 1;
17
{
15
18
bool ret = false;
16
loc = &info->in[next_arg];
19
- CPUClass *cc = CPU_GET_CLASS(cs);
17
- if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
20
AVRCPU *cpu = AVR_CPU(cs);
18
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
21
CPUAVRState *env = &cpu->env;
19
- ldst->addrlo_reg, ldst->addrhi_reg);
22
20
- tcg_out_helper_load_slots(s, nmov, mov, parm);
23
if (interrupt_request & CPU_INTERRUPT_RESET) {
21
- next_arg += nmov;
24
if (cpu_interrupts_enabled(env)) {
22
- } else {
25
cs->exception_index = EXCP_RESET;
23
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
26
- cc->tcg_ops->do_interrupt(cs);
24
/*
27
+ avr_cpu_do_interrupt(cs);
25
* 32-bit host with 32-bit guest: zero-extend the guest address
28
26
* to 64-bits for the helper by storing the low part, then
29
cs->interrupt_request &= ~CPU_INTERRUPT_RESET;
27
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
30
28
tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
31
@@ -XXX,XX +XXX,XX @@ bool avr_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
29
TCG_TYPE_I32, 0, parm);
32
if (cpu_interrupts_enabled(env) && env->intsrc != 0) {
30
next_arg += 2;
33
int index = ctz32(env->intsrc);
31
+ } else {
34
cs->exception_index = EXCP_INT(index);
32
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
35
- cc->tcg_ops->do_interrupt(cs);
33
+ ldst->addrlo_reg, ldst->addrhi_reg);
36
+ avr_cpu_do_interrupt(cs);
34
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
37
35
+ next_arg += nmov;
38
env->intsrc &= env->intsrc - 1; /* clear the interrupt */
36
}
39
if (!env->intsrc) {
37
38
switch (info->out_kind) {
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
40
41
/* Handle addr argument. */
42
loc = &info->in[next_arg];
43
- if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
44
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
45
- ldst->addrlo_reg, ldst->addrhi_reg);
46
- next_arg += n;
47
- nmov += n;
48
- } else {
49
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
50
/*
51
* 32-bit host with 32-bit guest: zero-extend the guest address
52
* to 64-bits for the helper by storing the low part. Later,
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
54
ldst->addrlo_reg, -1);
55
next_arg += 2;
56
nmov += 1;
57
+ } else {
58
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
59
+ ldst->addrlo_reg, ldst->addrhi_reg);
60
+ next_arg += n;
61
+ nmov += n;
62
}
63
64
/* Handle data argument. */
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
66
g_assert_not_reached();
67
}
68
69
- if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32) {
70
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
71
+ /* Zero extend the address by loading a zero for the high part. */
72
loc = &info->in[1 + !HOST_BIG_ENDIAN];
73
tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
74
}
40
--
75
--
41
2.34.1
76
2.34.1
42
77
43
78
diff view generated by jsdifflib
1
Cache the translation from guest to host address, so we may
1
Disconnect guest page size from TCG compilation.
2
use direct loads when we hit on the primary translation page.
2
While this could be done via exec/target_page.h, we want to cache
3
the value across multiple memory access operations, so we might
4
as well initialize this early.
3
5
4
Look up the second translation page only once, during translation.
6
The changes within tcg/ are entirely mechanical:
5
This obviates another lookup of the second page within tb_gen_code
6
after translation.
7
7
8
Fixes a bug in that plugin_insn_append should be passed the bytes
8
sed -i s/TARGET_PAGE_BITS/s->page_bits/g
9
in the original memory order, not bswapped by pieces.
9
sed -i s/TARGET_PAGE_MASK/s->page_mask/g
10
10
11
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
11
Reviewed-by: Anton Johansson <anjo@rev.ng>
12
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
13
---
15
include/exec/translator.h | 63 +++++++++++--------
14
include/tcg/tcg.h | 5 +++++
16
accel/tcg/translate-all.c | 23 +++----
15
accel/tcg/translate-all.c | 4 ++++
17
accel/tcg/translator.c | 126 +++++++++++++++++++++++++++++---------
16
tcg/aarch64/tcg-target.c.inc | 6 +++---
18
3 files changed, 141 insertions(+), 71 deletions(-)
17
tcg/arm/tcg-target.c.inc | 10 +++++-----
18
tcg/i386/tcg-target.c.inc | 6 +++---
19
tcg/loongarch64/tcg-target.c.inc | 4 ++--
20
tcg/mips/tcg-target.c.inc | 6 +++---
21
tcg/ppc/tcg-target.c.inc | 14 +++++++-------
22
tcg/riscv/tcg-target.c.inc | 4 ++--
23
tcg/s390x/tcg-target.c.inc | 4 ++--
24
tcg/sparc64/tcg-target.c.inc | 4 ++--
25
11 files changed, 38 insertions(+), 29 deletions(-)
19
26
20
diff --git a/include/exec/translator.h b/include/exec/translator.h
27
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
21
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
22
--- a/include/exec/translator.h
29
--- a/include/tcg/tcg.h
23
+++ b/include/exec/translator.h
30
+++ b/include/tcg/tcg.h
24
@@ -XXX,XX +XXX,XX @@ typedef enum DisasJumpType {
31
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
25
* Architecture-agnostic disassembly context.
32
int nb_ops;
26
*/
33
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
27
typedef struct DisasContextBase {
34
28
- const TranslationBlock *tb;
35
+#ifdef CONFIG_SOFTMMU
29
+ TranslationBlock *tb;
36
+ int page_mask;
30
target_ulong pc_first;
37
+ uint8_t page_bits;
31
target_ulong pc_next;
38
+#endif
32
DisasJumpType is_jmp;
33
int num_insns;
34
int max_insns;
35
bool singlestep_enabled;
36
-#ifdef CONFIG_USER_ONLY
37
- /*
38
- * Guest address of the last byte of the last protected page.
39
- *
40
- * Pages containing the translated instructions are made non-writable in
41
- * order to achieve consistency in case another thread is modifying the
42
- * code while translate_insn() fetches the instruction bytes piecemeal.
43
- * Such writer threads are blocked on mmap_lock() in page_unprotect().
44
- */
45
- target_ulong page_protect_end;
46
-#endif
47
+ void *host_addr[2];
48
} DisasContextBase;
49
50
/**
51
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
52
* the relevant information at translation time.
53
*/
54
55
-#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \
56
- type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
57
- abi_ptr pc, bool do_swap); \
58
- static inline type fullname(CPUArchState *env, \
59
- DisasContextBase *dcbase, abi_ptr pc) \
60
- { \
61
- return fullname ## _swap(env, dcbase, pc, false); \
62
+uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
63
+uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
64
+uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
65
+uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc);
66
+
39
+
67
+static inline uint16_t
40
TCGRegSet reserved_regs;
68
+translator_lduw_swap(CPUArchState *env, DisasContextBase *db,
41
intptr_t current_frame_offset;
69
+ abi_ptr pc, bool do_swap)
42
intptr_t frame_start;
70
+{
71
+ uint16_t ret = translator_lduw(env, db, pc);
72
+ if (do_swap) {
73
+ ret = bswap16(ret);
74
}
75
+ return ret;
76
+}
77
78
-#define FOR_EACH_TRANSLATOR_LD(F) \
79
- F(translator_ldub, uint8_t, cpu_ldub_code, /* no swap */) \
80
- F(translator_lduw, uint16_t, cpu_lduw_code, bswap16) \
81
- F(translator_ldl, uint32_t, cpu_ldl_code, bswap32) \
82
- F(translator_ldq, uint64_t, cpu_ldq_code, bswap64)
83
+static inline uint32_t
84
+translator_ldl_swap(CPUArchState *env, DisasContextBase *db,
85
+ abi_ptr pc, bool do_swap)
86
+{
87
+ uint32_t ret = translator_ldl(env, db, pc);
88
+ if (do_swap) {
89
+ ret = bswap32(ret);
90
+ }
91
+ return ret;
92
+}
93
94
-FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
95
-
96
-#undef GEN_TRANSLATOR_LD
97
+static inline uint64_t
98
+translator_ldq_swap(CPUArchState *env, DisasContextBase *db,
99
+ abi_ptr pc, bool do_swap)
100
+{
101
+ uint64_t ret = translator_ldq_swap(env, db, pc, false);
102
+ if (do_swap) {
103
+ ret = bswap64(ret);
104
+ }
105
+ return ret;
106
+}
107
108
/*
109
* Return whether addr is on the same page as where disassembly started.
110
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
43
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
111
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
112
--- a/accel/tcg/translate-all.c
45
--- a/accel/tcg/translate-all.c
113
+++ b/accel/tcg/translate-all.c
46
+++ b/accel/tcg/translate-all.c
114
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
47
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
115
{
48
tb_set_page_addr1(tb, -1);
116
CPUArchState *env = cpu->env_ptr;
49
tcg_ctx->gen_tb = tb;
117
TranslationBlock *tb, *existing_tb;
50
tcg_ctx->addr_type = TCG_TYPE_TL;
118
- tb_page_addr_t phys_pc, phys_page2;
51
+#ifdef CONFIG_SOFTMMU
119
- target_ulong virt_page2;
52
+ tcg_ctx->page_bits = TARGET_PAGE_BITS;
120
+ tb_page_addr_t phys_pc;
53
+ tcg_ctx->page_mask = TARGET_PAGE_MASK;
121
tcg_insn_unit *gen_code_buf;
54
+#endif
122
int gen_code_size, search_size, max_insns;
55
123
#ifdef CONFIG_PROFILER
124
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
125
tb->flags = flags;
126
tb->cflags = cflags;
127
tb->trace_vcpu_dstate = *cpu->trace_dstate;
128
+ tb->page_addr[0] = phys_pc;
129
+ tb->page_addr[1] = -1;
130
tcg_ctx->tb_cflags = cflags;
131
tb_overflow:
56
tb_overflow:
132
57
133
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
58
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
134
}
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/aarch64/tcg-target.c.inc
61
+++ b/tcg/aarch64/tcg-target.c.inc
62
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
63
ldst->oi = oi;
64
ldst->addrlo_reg = addr_reg;
65
66
- mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
67
+ mask_type = (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32
68
? TCG_TYPE_I64 : TCG_TYPE_I32);
69
70
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
71
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
72
/* Extract the TLB index from the address into X0. */
73
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
74
TCG_REG_X0, TCG_REG_X0, addr_reg,
75
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
76
+ s->page_bits - CPU_TLB_ENTRY_BITS);
77
78
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
79
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
80
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
81
TCG_REG_X3, addr_reg, s_mask - a_mask);
82
x3 = TCG_REG_X3;
83
}
84
- compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
85
+ compare_mask = (uint64_t)s->page_mask | a_mask;
86
87
/* Store the page mask part of the address into X3. */
88
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
89
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
90
index XXXXXXX..XXXXXXX 100644
91
--- a/tcg/arm/tcg-target.c.inc
92
+++ b/tcg/arm/tcg-target.c.inc
93
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
95
/* Extract the tlb index from the address into R0. */
96
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
97
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
98
+ SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
135
99
136
/*
100
/*
137
- * If the TB is not associated with a physical RAM page then
101
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
138
- * it must be a temporary one-insn TB, and we have nothing to do
102
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
139
- * except fill in the page_addr[] fields. Return early before
103
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
140
- * attempting to link to other TBs or add to the lookup table.
104
addrlo, s_mask - a_mask);
141
+ * If the TB is not associated with a physical RAM page then it must be
105
}
142
+ * a temporary one-insn TB, and we have nothing left to do. Return early
106
- if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
143
+ * before attempting to link to other TBs or add to the lookup table.
107
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
108
+ if (use_armv7_instructions && s->page_bits <= 16) {
109
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
110
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
111
t_addr, TCG_REG_TMP, 0);
112
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
113
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
114
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
115
}
116
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
117
- SHIFT_IMM_LSR(TARGET_PAGE_BITS));
118
+ SHIFT_IMM_LSR(s->page_bits));
119
tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
120
0, TCG_REG_R2, TCG_REG_TMP,
121
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
122
+ SHIFT_IMM_LSL(s->page_bits));
123
}
124
125
if (s->addr_type != TCG_TYPE_I32) {
126
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
127
index XXXXXXX..XXXXXXX 100644
128
--- a/tcg/i386/tcg-target.c.inc
129
+++ b/tcg/i386/tcg-target.c.inc
130
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
131
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
132
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
133
hrexw = P_REXW;
134
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
135
+ if (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32) {
136
tlbtype = TCG_TYPE_I64;
137
tlbrexw = P_REXW;
138
}
139
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
140
141
tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
142
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
143
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
144
+ s->page_bits - CPU_TLB_ENTRY_BITS);
145
146
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
147
TLB_MASK_TABLE_OFS(mem_index) +
148
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
149
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
150
addrlo, s_mask - a_mask);
151
}
152
- tlb_mask = TARGET_PAGE_MASK | a_mask;
153
+ tlb_mask = s->page_mask | a_mask;
154
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
155
156
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
157
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tcg/loongarch64/tcg-target.c.inc
160
+++ b/tcg/loongarch64/tcg-target.c.inc
161
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
162
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
163
164
tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
165
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
166
+ s->page_bits - CPU_TLB_ENTRY_BITS);
167
tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
168
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
169
170
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
171
tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
172
}
173
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
174
- a_bits, TARGET_PAGE_BITS - 1);
175
+ a_bits, s->page_bits - 1);
176
177
/* Compare masked address with the TLB entry. */
178
ldst->label_ptr[0] = s->code_ptr;
179
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
180
index XXXXXXX..XXXXXXX 100644
181
--- a/tcg/mips/tcg-target.c.inc
182
+++ b/tcg/mips/tcg-target.c.inc
183
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
184
/* Extract the TLB index from the address into TMP3. */
185
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
186
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
187
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
188
+ s->page_bits - CPU_TLB_ENTRY_BITS);
189
} else {
190
tcg_out_dsrl(s, TCG_TMP3, addrlo,
191
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
192
+ s->page_bits - CPU_TLB_ENTRY_BITS);
193
}
194
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
195
196
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
197
* For unaligned accesses, compare against the end of the access to
198
* verify that it does not cross a page boundary.
144
*/
199
*/
145
- if (phys_pc == -1) {
200
- tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
146
- tb->page_addr[0] = tb->page_addr[1] = -1;
201
+ tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
147
+ if (tb->page_addr[0] == -1) {
202
if (a_mask < s_mask) {
148
return tb;
203
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
149
}
204
tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
150
205
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
151
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
206
index XXXXXXX..XXXXXXX 100644
207
--- a/tcg/ppc/tcg-target.c.inc
208
+++ b/tcg/ppc/tcg-target.c.inc
209
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
210
/* Extract the page index, shifted into place for tlb index. */
211
if (TCG_TARGET_REG_BITS == 32) {
212
tcg_out_shri32(s, TCG_REG_R0, addrlo,
213
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
214
+ s->page_bits - CPU_TLB_ENTRY_BITS);
215
} else {
216
tcg_out_shri64(s, TCG_REG_R0, addrlo,
217
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
218
+ s->page_bits - CPU_TLB_ENTRY_BITS);
219
}
220
tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
221
222
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
223
a_bits = s_bits;
224
}
225
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
226
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
227
+ (32 - a_bits) & 31, 31 - s->page_bits);
228
} else {
229
TCGReg t = addrlo;
230
231
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
232
/* Mask the address for the requested alignment. */
233
if (TARGET_LONG_BITS == 32) {
234
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
235
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
236
+ (32 - a_bits) & 31, 31 - s->page_bits);
237
} else if (a_bits == 0) {
238
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
239
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
240
} else {
241
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
242
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
243
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
244
+ 64 - s->page_bits, s->page_bits - a_bits);
245
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
246
}
247
}
248
249
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
250
index XXXXXXX..XXXXXXX 100644
251
--- a/tcg/riscv/tcg-target.c.inc
252
+++ b/tcg/riscv/tcg-target.c.inc
253
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
254
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
255
256
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
257
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
258
+ s->page_bits - CPU_TLB_ENTRY_BITS);
259
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
260
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
261
262
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
263
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
264
addr_adj, addr_reg, s_mask - a_mask);
265
}
266
- compare_mask = TARGET_PAGE_MASK | a_mask;
267
+ compare_mask = s->page_mask | a_mask;
268
if (compare_mask == sextreg(compare_mask, 0, 12)) {
269
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
270
} else {
271
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
272
index XXXXXXX..XXXXXXX 100644
273
--- a/tcg/s390x/tcg-target.c.inc
274
+++ b/tcg/s390x/tcg-target.c.inc
275
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
276
ldst->addrlo_reg = addr_reg;
277
278
tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
279
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
280
+ s->page_bits - CPU_TLB_ENTRY_BITS);
281
282
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
283
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
284
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
285
* cross pages using the address of the last byte of the access.
152
*/
286
*/
153
tcg_tb_insert(tb);
287
a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
154
288
- tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
155
- /* check next page if needed */
289
+ tlb_mask = (uint64_t)s->page_mask | a_mask;
156
- virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
290
if (a_off == 0) {
157
- phys_page2 = -1;
291
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
158
- if ((pc & TARGET_PAGE_MASK) != virt_page2) {
292
} else {
159
- phys_page2 = get_page_addr_code(env, virt_page2);
293
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
160
- }
294
index XXXXXXX..XXXXXXX 100644
161
/*
295
--- a/tcg/sparc64/tcg-target.c.inc
162
* No explicit memory barrier is required -- tb_link_page() makes the
296
+++ b/tcg/sparc64/tcg-target.c.inc
163
* TB visible in a consistent state.
297
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
164
*/
298
165
- existing_tb = tb_link_page(tb, phys_pc, phys_page2);
299
/* Extract the page index, shifted into place for tlb index. */
166
+ existing_tb = tb_link_page(tb, tb->page_addr[0], tb->page_addr[1]);
300
tcg_out_arithi(s, TCG_REG_T1, addr_reg,
167
/* if the TB already exists, discard what we just translated */
301
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
168
if (unlikely(existing_tb != tb)) {
302
+ s->page_bits - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
169
uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
303
tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
170
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
304
171
index XXXXXXX..XXXXXXX 100644
305
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
172
--- a/accel/tcg/translator.c
306
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
173
+++ b/accel/tcg/translator.c
307
h->base = TCG_REG_T1;
174
@@ -XXX,XX +XXX,XX @@ bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
308
175
return ((db->pc_first ^ dest) & TARGET_PAGE_MASK) == 0;
309
/* Mask out the page offset, except for the required alignment. */
176
}
310
- compare_mask = TARGET_PAGE_MASK | a_mask;
177
311
+ compare_mask = s->page_mask | a_mask;
178
-static inline void translator_page_protect(DisasContextBase *dcbase,
312
if (check_fit_tl(compare_mask, 13)) {
179
- target_ulong pc)
313
tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
180
-{
314
} else {
181
-#ifdef CONFIG_USER_ONLY
182
- dcbase->page_protect_end = pc | ~TARGET_PAGE_MASK;
183
- page_protect(pc);
184
-#endif
185
-}
186
-
187
void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
188
target_ulong pc, void *host_pc,
189
const TranslatorOps *ops, DisasContextBase *db)
190
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
191
db->num_insns = 0;
192
db->max_insns = max_insns;
193
db->singlestep_enabled = cflags & CF_SINGLE_STEP;
194
- translator_page_protect(db, db->pc_next);
195
+ db->host_addr[0] = host_pc;
196
+ db->host_addr[1] = NULL;
197
+
198
+#ifdef CONFIG_USER_ONLY
199
+ page_protect(pc);
200
+#endif
201
202
ops->init_disas_context(db, cpu);
203
tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
204
@@ -XXX,XX +XXX,XX @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
205
#endif
206
}
207
208
-static inline void translator_maybe_page_protect(DisasContextBase *dcbase,
209
- target_ulong pc, size_t len)
210
+static void *translator_access(CPUArchState *env, DisasContextBase *db,
211
+ target_ulong pc, size_t len)
212
{
213
-#ifdef CONFIG_USER_ONLY
214
- target_ulong end = pc + len - 1;
215
+ void *host;
216
+ target_ulong base, end;
217
+ TranslationBlock *tb;
218
219
- if (end > dcbase->page_protect_end) {
220
- translator_page_protect(dcbase, end);
221
+ tb = db->tb;
222
+
223
+ /* Use slow path if first page is MMIO. */
224
+ if (unlikely(tb->page_addr[0] == -1)) {
225
+ return NULL;
226
}
227
+
228
+ end = pc + len - 1;
229
+ if (likely(is_same_page(db, end))) {
230
+ host = db->host_addr[0];
231
+ base = db->pc_first;
232
+ } else {
233
+ host = db->host_addr[1];
234
+ base = TARGET_PAGE_ALIGN(db->pc_first);
235
+ if (host == NULL) {
236
+ tb->page_addr[1] =
237
+ get_page_addr_code_hostp(env, base, &db->host_addr[1]);
238
+#ifdef CONFIG_USER_ONLY
239
+ page_protect(end);
240
#endif
241
+ /* We cannot handle MMIO as second page. */
242
+ assert(tb->page_addr[1] != -1);
243
+ host = db->host_addr[1];
244
+ }
245
+
246
+ /* Use slow path when crossing pages. */
247
+ if (is_same_page(db, pc)) {
248
+ return NULL;
249
+ }
250
+ }
251
+
252
+ tcg_debug_assert(pc >= base);
253
+ return host + (pc - base);
254
}
255
256
-#define GEN_TRANSLATOR_LD(fullname, type, load_fn, swap_fn) \
257
- type fullname ## _swap(CPUArchState *env, DisasContextBase *dcbase, \
258
- abi_ptr pc, bool do_swap) \
259
- { \
260
- translator_maybe_page_protect(dcbase, pc, sizeof(type)); \
261
- type ret = load_fn(env, pc); \
262
- if (do_swap) { \
263
- ret = swap_fn(ret); \
264
- } \
265
- plugin_insn_append(pc, &ret, sizeof(ret)); \
266
- return ret; \
267
+uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
268
+{
269
+ uint8_t ret;
270
+ void *p = translator_access(env, db, pc, sizeof(ret));
271
+
272
+ if (p) {
273
+ plugin_insn_append(pc, p, sizeof(ret));
274
+ return ldub_p(p);
275
}
276
+ ret = cpu_ldub_code(env, pc);
277
+ plugin_insn_append(pc, &ret, sizeof(ret));
278
+ return ret;
279
+}
280
281
-FOR_EACH_TRANSLATOR_LD(GEN_TRANSLATOR_LD)
282
+uint16_t translator_lduw(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
283
+{
284
+ uint16_t ret, plug;
285
+ void *p = translator_access(env, db, pc, sizeof(ret));
286
287
-#undef GEN_TRANSLATOR_LD
288
+ if (p) {
289
+ plugin_insn_append(pc, p, sizeof(ret));
290
+ return lduw_p(p);
291
+ }
292
+ ret = cpu_lduw_code(env, pc);
293
+ plug = tswap16(ret);
294
+ plugin_insn_append(pc, &plug, sizeof(ret));
295
+ return ret;
296
+}
297
+
298
+uint32_t translator_ldl(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
299
+{
300
+ uint32_t ret, plug;
301
+ void *p = translator_access(env, db, pc, sizeof(ret));
302
+
303
+ if (p) {
304
+ plugin_insn_append(pc, p, sizeof(ret));
305
+ return ldl_p(p);
306
+ }
307
+ ret = cpu_ldl_code(env, pc);
308
+ plug = tswap32(ret);
309
+ plugin_insn_append(pc, &plug, sizeof(ret));
310
+ return ret;
311
+}
312
+
313
+uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
314
+{
315
+ uint64_t ret, plug;
316
+ void *p = translator_access(env, db, pc, sizeof(ret));
317
+
318
+ if (p) {
319
+ plugin_insn_append(pc, p, sizeof(ret));
320
+ return ldq_p(p);
321
+ }
322
+ ret = cpu_ldq_code(env, pc);
323
+ plug = tswap64(ret);
324
+ plugin_insn_append(pc, &plug, sizeof(ret));
325
+ return ret;
326
+}
327
--
315
--
328
2.34.1
316
2.34.1
diff view generated by jsdifflib
1
Pass these along to translator_loop -- pc may be used instead
1
Disconnect guest tlb parameters from TCG compilation.
2
of tb->pc, and host_pc is currently unused. Adjust all targets
3
at one time.
4
2
5
Acked-by: Alistair Francis <alistair.francis@wdc.com>
3
Reviewed-by: Anton Johansson <anjo@rev.ng>
6
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
7
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
5
---
10
include/exec/exec-all.h | 1 -
6
include/tcg/tcg.h | 1 +
11
include/exec/translator.h | 24 ++++++++++++++++++++----
7
accel/tcg/translate-all.c | 1 +
12
accel/tcg/translate-all.c | 6 ++++--
8
tcg/aarch64/tcg-target.c.inc | 2 +-
13
accel/tcg/translator.c | 9 +++++----
9
tcg/i386/tcg-target.c.inc | 2 +-
14
target/alpha/translate.c | 5 +++--
10
4 files changed, 4 insertions(+), 2 deletions(-)
15
target/arm/translate.c | 5 +++--
16
target/avr/translate.c | 5 +++--
17
target/cris/translate.c | 5 +++--
18
target/hexagon/translate.c | 6 ++++--
19
target/hppa/translate.c | 5 +++--
20
target/i386/tcg/translate.c | 5 +++--
21
target/loongarch/translate.c | 6 ++++--
22
target/m68k/translate.c | 5 +++--
23
target/microblaze/translate.c | 5 +++--
24
target/mips/tcg/translate.c | 5 +++--
25
target/nios2/translate.c | 5 +++--
26
target/openrisc/translate.c | 6 ++++--
27
target/ppc/translate.c | 5 +++--
28
target/riscv/translate.c | 5 +++--
29
target/rx/translate.c | 5 +++--
30
target/s390x/tcg/translate.c | 5 +++--
31
target/sh4/translate.c | 5 +++--
32
target/sparc/translate.c | 5 +++--
33
target/tricore/translate.c | 6 ++++--
34
target/xtensa/translate.c | 6 ++++--
35
25 files changed, 97 insertions(+), 53 deletions(-)
36
11
37
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
38
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
39
--- a/include/exec/exec-all.h
14
--- a/include/tcg/tcg.h
40
+++ b/include/exec/exec-all.h
15
+++ b/include/tcg/tcg.h
41
@@ -XXX,XX +XXX,XX @@ typedef ram_addr_t tb_page_addr_t;
16
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
42
#define TB_PAGE_ADDR_FMT RAM_ADDR_FMT
17
#ifdef CONFIG_SOFTMMU
18
int page_mask;
19
uint8_t page_bits;
20
+ uint8_t tlb_dyn_max_bits;
43
#endif
21
#endif
44
22
45
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns);
23
TCGRegSet reserved_regs;
46
void restore_state_to_opc(CPUArchState *env, TranslationBlock *tb,
47
target_ulong *data);
48
49
diff --git a/include/exec/translator.h b/include/exec/translator.h
50
index XXXXXXX..XXXXXXX 100644
51
--- a/include/exec/translator.h
52
+++ b/include/exec/translator.h
53
@@ -XXX,XX +XXX,XX @@
54
#include "exec/translate-all.h"
55
#include "tcg/tcg.h"
56
57
+/**
58
+ * gen_intermediate_code
59
+ * @cpu: cpu context
60
+ * @tb: translation block
61
+ * @max_insns: max number of instructions to translate
62
+ * @pc: guest virtual program counter address
63
+ * @host_pc: host physical program counter address
64
+ *
65
+ * This function must be provided by the target, which should create
66
+ * the target-specific DisasContext, and then invoke translator_loop.
67
+ */
68
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
69
+ target_ulong pc, void *host_pc);
70
71
/**
72
* DisasJumpType:
73
@@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps {
74
75
/**
76
* translator_loop:
77
- * @ops: Target-specific operations.
78
- * @db: Disassembly context.
79
* @cpu: Target vCPU.
80
* @tb: Translation block.
81
* @max_insns: Maximum number of insns to translate.
82
+ * @pc: guest virtual program counter address
83
+ * @host_pc: host physical program counter address
84
+ * @ops: Target-specific operations.
85
+ * @db: Disassembly context.
86
*
87
* Generic translator loop.
88
*
89
@@ -XXX,XX +XXX,XX @@ typedef struct TranslatorOps {
90
* - When single-stepping is enabled (system-wide or on the current vCPU).
91
* - When too many instructions have been translated.
92
*/
93
-void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
94
- CPUState *cpu, TranslationBlock *tb, int max_insns);
95
+void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
96
+ target_ulong pc, void *host_pc,
97
+ const TranslatorOps *ops, DisasContextBase *db);
98
99
void translator_loop_temp_check(DisasContextBase *db);
100
101
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
24
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
102
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
103
--- a/accel/tcg/translate-all.c
26
--- a/accel/tcg/translate-all.c
104
+++ b/accel/tcg/translate-all.c
27
+++ b/accel/tcg/translate-all.c
105
@@ -XXX,XX +XXX,XX @@
106
107
#include "exec/cputlb.h"
108
#include "exec/translate-all.h"
109
+#include "exec/translator.h"
110
#include "qemu/bitmap.h"
111
#include "qemu/qemu-print.h"
112
#include "qemu/timer.h"
113
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
28
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
114
TCGProfile *prof = &tcg_ctx->prof;
29
#ifdef CONFIG_SOFTMMU
115
int64_t ti;
30
tcg_ctx->page_bits = TARGET_PAGE_BITS;
31
tcg_ctx->page_mask = TARGET_PAGE_MASK;
32
+ tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
116
#endif
33
#endif
117
+ void *host_pc;
34
118
35
tb_overflow:
119
assert_memory_lock();
36
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
120
qemu_thread_jit_write();
121
122
- phys_pc = get_page_addr_code(env, pc);
123
+ phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
124
125
if (phys_pc == -1) {
126
/* Generate a one-shot TB with 1 insn in it */
127
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
128
tcg_func_start(tcg_ctx);
129
130
tcg_ctx->cpu = env_cpu(env);
131
- gen_intermediate_code(cpu, tb, max_insns);
132
+ gen_intermediate_code(cpu, tb, max_insns, pc, host_pc);
133
assert(tb->size != 0);
134
tcg_ctx->cpu = NULL;
135
max_insns = tb->icount;
136
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
137
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
138
--- a/accel/tcg/translator.c
38
--- a/tcg/aarch64/tcg-target.c.inc
139
+++ b/accel/tcg/translator.c
39
+++ b/tcg/aarch64/tcg-target.c.inc
140
@@ -XXX,XX +XXX,XX @@ static inline void translator_page_protect(DisasContextBase *dcbase,
40
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
141
#endif
41
ldst->oi = oi;
142
}
42
ldst->addrlo_reg = addr_reg;
143
43
144
-void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
44
- mask_type = (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32
145
- CPUState *cpu, TranslationBlock *tb, int max_insns)
45
+ mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
146
+void translator_loop(CPUState *cpu, TranslationBlock *tb, int max_insns,
46
? TCG_TYPE_I64 : TCG_TYPE_I32);
147
+ target_ulong pc, void *host_pc,
47
148
+ const TranslatorOps *ops, DisasContextBase *db)
48
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
149
{
49
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
150
uint32_t cflags = tb_cflags(tb);
151
bool plugin_enabled;
152
153
/* Initialize DisasContext */
154
db->tb = tb;
155
- db->pc_first = tb->pc;
156
- db->pc_next = db->pc_first;
157
+ db->pc_first = pc;
158
+ db->pc_next = pc;
159
db->is_jmp = DISAS_NEXT;
160
db->num_insns = 0;
161
db->max_insns = max_insns;
162
diff --git a/target/alpha/translate.c b/target/alpha/translate.c
163
index XXXXXXX..XXXXXXX 100644
50
index XXXXXXX..XXXXXXX 100644
164
--- a/target/alpha/translate.c
51
--- a/tcg/i386/tcg-target.c.inc
165
+++ b/target/alpha/translate.c
52
+++ b/tcg/i386/tcg-target.c.inc
166
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps alpha_tr_ops = {
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
167
.disas_log = alpha_tr_disas_log,
54
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
168
};
55
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
169
56
hrexw = P_REXW;
170
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
57
- if (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32) {
171
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
58
+ if (s->page_bits + s->tlb_dyn_max_bits > 32) {
172
+ target_ulong pc, void *host_pc)
59
tlbtype = TCG_TYPE_I64;
173
{
60
tlbrexw = P_REXW;
174
DisasContext dc;
61
}
175
- translator_loop(&alpha_tr_ops, &dc.base, cpu, tb, max_insns);
176
+ translator_loop(cpu, tb, max_insns, pc, host_pc, &alpha_tr_ops, &dc.base);
177
}
178
179
void restore_state_to_opc(CPUAlphaState *env, TranslationBlock *tb,
180
diff --git a/target/arm/translate.c b/target/arm/translate.c
181
index XXXXXXX..XXXXXXX 100644
182
--- a/target/arm/translate.c
183
+++ b/target/arm/translate.c
184
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps thumb_translator_ops = {
185
};
186
187
/* generate intermediate code for basic block 'tb'. */
188
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
189
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
190
+ target_ulong pc, void *host_pc)
191
{
192
DisasContext dc = { };
193
const TranslatorOps *ops = &arm_translator_ops;
194
@@ -XXX,XX +XXX,XX @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
195
}
196
#endif
197
198
- translator_loop(ops, &dc.base, cpu, tb, max_insns);
199
+ translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
200
}
201
202
void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
203
diff --git a/target/avr/translate.c b/target/avr/translate.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/target/avr/translate.c
206
+++ b/target/avr/translate.c
207
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps avr_tr_ops = {
208
.disas_log = avr_tr_disas_log,
209
};
210
211
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
212
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
213
+ target_ulong pc, void *host_pc)
214
{
215
DisasContext dc = { };
216
- translator_loop(&avr_tr_ops, &dc.base, cs, tb, max_insns);
217
+ translator_loop(cs, tb, max_insns, pc, host_pc, &avr_tr_ops, &dc.base);
218
}
219
220
void restore_state_to_opc(CPUAVRState *env, TranslationBlock *tb,
221
diff --git a/target/cris/translate.c b/target/cris/translate.c
222
index XXXXXXX..XXXXXXX 100644
223
--- a/target/cris/translate.c
224
+++ b/target/cris/translate.c
225
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps cris_tr_ops = {
226
.disas_log = cris_tr_disas_log,
227
};
228
229
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
230
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
231
+ target_ulong pc, void *host_pc)
232
{
233
DisasContext dc;
234
- translator_loop(&cris_tr_ops, &dc.base, cs, tb, max_insns);
235
+ translator_loop(cs, tb, max_insns, pc, host_pc, &cris_tr_ops, &dc.base);
236
}
237
238
void cris_cpu_dump_state(CPUState *cs, FILE *f, int flags)
239
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
240
index XXXXXXX..XXXXXXX 100644
241
--- a/target/hexagon/translate.c
242
+++ b/target/hexagon/translate.c
243
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hexagon_tr_ops = {
244
.disas_log = hexagon_tr_disas_log,
245
};
246
247
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
248
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
249
+ target_ulong pc, void *host_pc)
250
{
251
DisasContext ctx;
252
253
- translator_loop(&hexagon_tr_ops, &ctx.base, cs, tb, max_insns);
254
+ translator_loop(cs, tb, max_insns, pc, host_pc,
255
+ &hexagon_tr_ops, &ctx.base);
256
}
257
258
#define NAME_LEN 64
259
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
260
index XXXXXXX..XXXXXXX 100644
261
--- a/target/hppa/translate.c
262
+++ b/target/hppa/translate.c
263
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps hppa_tr_ops = {
264
.disas_log = hppa_tr_disas_log,
265
};
266
267
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
268
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
269
+ target_ulong pc, void *host_pc)
270
{
271
DisasContext ctx;
272
- translator_loop(&hppa_tr_ops, &ctx.base, cs, tb, max_insns);
273
+ translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base);
274
}
275
276
void restore_state_to_opc(CPUHPPAState *env, TranslationBlock *tb,
277
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
278
index XXXXXXX..XXXXXXX 100644
279
--- a/target/i386/tcg/translate.c
280
+++ b/target/i386/tcg/translate.c
281
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps i386_tr_ops = {
282
};
283
284
/* generate intermediate code for basic block 'tb'. */
285
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
286
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
287
+ target_ulong pc, void *host_pc)
288
{
289
DisasContext dc;
290
291
- translator_loop(&i386_tr_ops, &dc.base, cpu, tb, max_insns);
292
+ translator_loop(cpu, tb, max_insns, pc, host_pc, &i386_tr_ops, &dc.base);
293
}
294
295
void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
296
diff --git a/target/loongarch/translate.c b/target/loongarch/translate.c
297
index XXXXXXX..XXXXXXX 100644
298
--- a/target/loongarch/translate.c
299
+++ b/target/loongarch/translate.c
300
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps loongarch_tr_ops = {
301
.disas_log = loongarch_tr_disas_log,
302
};
303
304
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
305
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
306
+ target_ulong pc, void *host_pc)
307
{
308
DisasContext ctx;
309
310
- translator_loop(&loongarch_tr_ops, &ctx.base, cs, tb, max_insns);
311
+ translator_loop(cs, tb, max_insns, pc, host_pc,
312
+ &loongarch_tr_ops, &ctx.base);
313
}
314
315
void loongarch_translate_init(void)
316
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
317
index XXXXXXX..XXXXXXX 100644
318
--- a/target/m68k/translate.c
319
+++ b/target/m68k/translate.c
320
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps m68k_tr_ops = {
321
.disas_log = m68k_tr_disas_log,
322
};
323
324
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
325
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
326
+ target_ulong pc, void *host_pc)
327
{
328
DisasContext dc;
329
- translator_loop(&m68k_tr_ops, &dc.base, cpu, tb, max_insns);
330
+ translator_loop(cpu, tb, max_insns, pc, host_pc, &m68k_tr_ops, &dc.base);
331
}
332
333
static double floatx80_to_double(CPUM68KState *env, uint16_t high, uint64_t low)
334
diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c
335
index XXXXXXX..XXXXXXX 100644
336
--- a/target/microblaze/translate.c
337
+++ b/target/microblaze/translate.c
338
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mb_tr_ops = {
339
.disas_log = mb_tr_disas_log,
340
};
341
342
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
343
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
344
+ target_ulong pc, void *host_pc)
345
{
346
DisasContext dc;
347
- translator_loop(&mb_tr_ops, &dc.base, cpu, tb, max_insns);
348
+ translator_loop(cpu, tb, max_insns, pc, host_pc, &mb_tr_ops, &dc.base);
349
}
350
351
void mb_cpu_dump_state(CPUState *cs, FILE *f, int flags)
352
diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c
353
index XXXXXXX..XXXXXXX 100644
354
--- a/target/mips/tcg/translate.c
355
+++ b/target/mips/tcg/translate.c
356
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps mips_tr_ops = {
357
.disas_log = mips_tr_disas_log,
358
};
359
360
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
361
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
362
+ target_ulong pc, void *host_pc)
363
{
364
DisasContext ctx;
365
366
- translator_loop(&mips_tr_ops, &ctx.base, cs, tb, max_insns);
367
+ translator_loop(cs, tb, max_insns, pc, host_pc, &mips_tr_ops, &ctx.base);
368
}
369
370
void mips_tcg_init(void)
371
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
372
index XXXXXXX..XXXXXXX 100644
373
--- a/target/nios2/translate.c
374
+++ b/target/nios2/translate.c
375
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps nios2_tr_ops = {
376
.disas_log = nios2_tr_disas_log,
377
};
378
379
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
380
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
381
+ target_ulong pc, void *host_pc)
382
{
383
DisasContext dc;
384
- translator_loop(&nios2_tr_ops, &dc.base, cs, tb, max_insns);
385
+ translator_loop(cs, tb, max_insns, pc, host_pc, &nios2_tr_ops, &dc.base);
386
}
387
388
void nios2_cpu_dump_state(CPUState *cs, FILE *f, int flags)
389
diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c
390
index XXXXXXX..XXXXXXX 100644
391
--- a/target/openrisc/translate.c
392
+++ b/target/openrisc/translate.c
393
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps openrisc_tr_ops = {
394
.disas_log = openrisc_tr_disas_log,
395
};
396
397
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
398
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
399
+ target_ulong pc, void *host_pc)
400
{
401
DisasContext ctx;
402
403
- translator_loop(&openrisc_tr_ops, &ctx.base, cs, tb, max_insns);
404
+ translator_loop(cs, tb, max_insns, pc, host_pc,
405
+ &openrisc_tr_ops, &ctx.base);
406
}
407
408
void openrisc_cpu_dump_state(CPUState *cs, FILE *f, int flags)
409
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
410
index XXXXXXX..XXXXXXX 100644
411
--- a/target/ppc/translate.c
412
+++ b/target/ppc/translate.c
413
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps ppc_tr_ops = {
414
.disas_log = ppc_tr_disas_log,
415
};
416
417
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
418
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
419
+ target_ulong pc, void *host_pc)
420
{
421
DisasContext ctx;
422
423
- translator_loop(&ppc_tr_ops, &ctx.base, cs, tb, max_insns);
424
+ translator_loop(cs, tb, max_insns, pc, host_pc, &ppc_tr_ops, &ctx.base);
425
}
426
427
void restore_state_to_opc(CPUPPCState *env, TranslationBlock *tb,
428
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
429
index XXXXXXX..XXXXXXX 100644
430
--- a/target/riscv/translate.c
431
+++ b/target/riscv/translate.c
432
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps riscv_tr_ops = {
433
.disas_log = riscv_tr_disas_log,
434
};
435
436
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
437
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
438
+ target_ulong pc, void *host_pc)
439
{
440
DisasContext ctx;
441
442
- translator_loop(&riscv_tr_ops, &ctx.base, cs, tb, max_insns);
443
+ translator_loop(cs, tb, max_insns, pc, host_pc, &riscv_tr_ops, &ctx.base);
444
}
445
446
void riscv_translate_init(void)
447
diff --git a/target/rx/translate.c b/target/rx/translate.c
448
index XXXXXXX..XXXXXXX 100644
449
--- a/target/rx/translate.c
450
+++ b/target/rx/translate.c
451
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps rx_tr_ops = {
452
.disas_log = rx_tr_disas_log,
453
};
454
455
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
456
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
457
+ target_ulong pc, void *host_pc)
458
{
459
DisasContext dc;
460
461
- translator_loop(&rx_tr_ops, &dc.base, cs, tb, max_insns);
462
+ translator_loop(cs, tb, max_insns, pc, host_pc, &rx_tr_ops, &dc.base);
463
}
464
465
void restore_state_to_opc(CPURXState *env, TranslationBlock *tb,
466
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
467
index XXXXXXX..XXXXXXX 100644
468
--- a/target/s390x/tcg/translate.c
469
+++ b/target/s390x/tcg/translate.c
470
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps s390x_tr_ops = {
471
.disas_log = s390x_tr_disas_log,
472
};
473
474
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
475
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
476
+ target_ulong pc, void *host_pc)
477
{
478
DisasContext dc;
479
480
- translator_loop(&s390x_tr_ops, &dc.base, cs, tb, max_insns);
481
+ translator_loop(cs, tb, max_insns, pc, host_pc, &s390x_tr_ops, &dc.base);
482
}
483
484
void restore_state_to_opc(CPUS390XState *env, TranslationBlock *tb,
485
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
486
index XXXXXXX..XXXXXXX 100644
487
--- a/target/sh4/translate.c
488
+++ b/target/sh4/translate.c
489
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sh4_tr_ops = {
490
.disas_log = sh4_tr_disas_log,
491
};
492
493
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
494
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
495
+ target_ulong pc, void *host_pc)
496
{
497
DisasContext ctx;
498
499
- translator_loop(&sh4_tr_ops, &ctx.base, cs, tb, max_insns);
500
+ translator_loop(cs, tb, max_insns, pc, host_pc, &sh4_tr_ops, &ctx.base);
501
}
502
503
void restore_state_to_opc(CPUSH4State *env, TranslationBlock *tb,
504
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
505
index XXXXXXX..XXXXXXX 100644
506
--- a/target/sparc/translate.c
507
+++ b/target/sparc/translate.c
508
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps sparc_tr_ops = {
509
.disas_log = sparc_tr_disas_log,
510
};
511
512
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
513
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
514
+ target_ulong pc, void *host_pc)
515
{
516
DisasContext dc = {};
517
518
- translator_loop(&sparc_tr_ops, &dc.base, cs, tb, max_insns);
519
+ translator_loop(cs, tb, max_insns, pc, host_pc, &sparc_tr_ops, &dc.base);
520
}
521
522
void sparc_tcg_init(void)
523
diff --git a/target/tricore/translate.c b/target/tricore/translate.c
524
index XXXXXXX..XXXXXXX 100644
525
--- a/target/tricore/translate.c
526
+++ b/target/tricore/translate.c
527
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps tricore_tr_ops = {
528
};
529
530
531
-void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
532
+void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns,
533
+ target_ulong pc, void *host_pc)
534
{
535
DisasContext ctx;
536
- translator_loop(&tricore_tr_ops, &ctx.base, cs, tb, max_insns);
537
+ translator_loop(cs, tb, max_insns, pc, host_pc,
538
+ &tricore_tr_ops, &ctx.base);
539
}
540
541
void
542
diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c
543
index XXXXXXX..XXXXXXX 100644
544
--- a/target/xtensa/translate.c
545
+++ b/target/xtensa/translate.c
546
@@ -XXX,XX +XXX,XX @@ static const TranslatorOps xtensa_translator_ops = {
547
.disas_log = xtensa_tr_disas_log,
548
};
549
550
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
551
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns,
552
+ target_ulong pc, void *host_pc)
553
{
554
DisasContext dc = {};
555
- translator_loop(&xtensa_translator_ops, &dc.base, cpu, tb, max_insns);
556
+ translator_loop(cpu, tb, max_insns, pc, host_pc,
557
+ &xtensa_translator_ops, &dc.base);
558
}
559
560
void xtensa_cpu_dump_state(CPUState *cs, FILE *f, int flags)
561
--
62
--
562
2.34.1
63
2.34.1
diff view generated by jsdifflib
1
It was non-obvious to me why we can raise an exception in
1
TCG will need this declaration, without all of the other
2
the middle of a comparison function, but it works.
2
bits that come with cpu-all.h.
3
While nearby, use TARGET_PAGE_ALIGN instead of open-coding.
4
3
5
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
4
Reviewed-by: Thomas Huth <thuth@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
accel/tcg/cpu-exec.c | 11 ++++++++++-
7
include/exec/cpu-all.h | 5 +----
9
1 file changed, 10 insertions(+), 1 deletion(-)
8
include/exec/user/guest-base.h | 12 ++++++++++++
9
tcg/tcg.c | 3 +++
10
3 files changed, 16 insertions(+), 4 deletions(-)
11
create mode 100644 include/exec/user/guest-base.h
10
12
11
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
13
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cpu-exec.c
15
--- a/include/exec/cpu-all.h
14
+++ b/accel/tcg/cpu-exec.c
16
+++ b/include/exec/cpu-all.h
15
@@ -XXX,XX +XXX,XX @@ static bool tb_lookup_cmp(const void *p, const void *d)
17
@@ -XXX,XX +XXX,XX @@
16
tb_page_addr_t phys_page2;
18
17
target_ulong virt_page2;
19
#if defined(CONFIG_USER_ONLY)
18
20
#include "exec/user/abitypes.h"
19
- virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
21
+#include "exec/user/guest-base.h"
20
+ /*
22
21
+ * We know that the first page matched, and an otherwise valid TB
23
-/* On some host systems the guest address space is reserved on the host.
22
+ * encountered an incomplete instruction at the end of that page,
24
- * This allows the guest address space to be offset to a convenient location.
23
+ * therefore we know that generating a new TB from the current PC
25
- */
24
+ * must also require reading from the next page -- even if the
26
-extern uintptr_t guest_base;
25
+ * second pages do not match, and therefore the resulting insn
27
extern bool have_guest_base;
26
+ * is different for the new TB. Therefore any exception raised
28
27
+ * here by the faulting lookup is not premature.
29
/*
28
+ */
30
diff --git a/include/exec/user/guest-base.h b/include/exec/user/guest-base.h
29
+ virt_page2 = TARGET_PAGE_ALIGN(desc->pc);
31
new file mode 100644
30
phys_page2 = get_page_addr_code(desc->env, virt_page2);
32
index XXXXXXX..XXXXXXX
31
if (tb->page_addr[1] == phys_page2) {
33
--- /dev/null
32
return true;
34
+++ b/include/exec/user/guest-base.h
35
@@ -XXX,XX +XXX,XX @@
36
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
37
+/*
38
+ * Declaration of guest_base.
39
+ * Copyright (c) 2003 Fabrice Bellard
40
+ */
41
+
42
+#ifndef EXEC_USER_GUEST_BASE_H
43
+#define EXEC_USER_GUEST_BASE_H
44
+
45
+extern uintptr_t guest_base;
46
+
47
+#endif
48
diff --git a/tcg/tcg.c b/tcg/tcg.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/tcg/tcg.c
51
+++ b/tcg/tcg.c
52
@@ -XXX,XX +XXX,XX @@
53
#include "tcg/tcg-temp-internal.h"
54
#include "tcg-internal.h"
55
#include "accel/tcg/perf.h"
56
+#ifdef CONFIG_USER_ONLY
57
+#include "exec/user/guest-base.h"
58
+#endif
59
60
/* Forward declarations for functions declared in tcg-target.c.inc and
61
used here. */
33
--
62
--
34
2.34.1
63
2.34.1
diff view generated by jsdifflib