1
The following changes since commit 95d1fbabae0cd44156ac4b96d512d143ca7dfd5e:
1
The following changes since commit 8844bb8d896595ee1d25d21c770e6e6f29803097:
2
2
3
Merge remote-tracking branch 'remotes/kraxel/tags/fixes-20200716-pull-request' into staging (2020-07-16 18:50:51 +0100)
3
Merge tag 'or1k-pull-request-20230513' of https://github.com/stffrdhrn/qemu into staging (2023-05-13 11:23:14 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200717
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230516
8
8
9
for you to fetch changes up to ba3c35d9c4026361fd380b269dc6def9510b7166:
9
for you to fetch changes up to ee95d036bf4bfa10be65325a287bf3d0e8b2a0e6:
10
10
11
tcg/cpu-exec: precise single-stepping after an interrupt (2020-07-17 11:09:34 -0700)
11
tcg: Split out exec/user/guest-base.h (2023-05-16 08:11:53 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Fix vector min/max fallback expansion
14
tcg/i386: Fix tcg_out_addi_ptr for win64
15
Fix singlestep from exception and interrupt
15
tcg: Implement atomicity for TCGv_i128
16
tcg: First quarter of cleanups for building tcg once
16
17
17
----------------------------------------------------------------
18
----------------------------------------------------------------
18
Luc Michel (1):
19
Richard Henderson (80):
19
tcg/cpu-exec: precise single-stepping after an exception
20
tcg/i386: Set P_REXW in tcg_out_addi_ptr
21
include/exec/memop: Add MO_ATOM_*
22
accel/tcg: Honor atomicity of loads
23
accel/tcg: Honor atomicity of stores
24
tcg: Unify helper_{be,le}_{ld,st}*
25
accel/tcg: Implement helper_{ld,st}*_mmu for user-only
26
tcg/tci: Use helper_{ld,st}*_mmu for user-only
27
tcg: Add 128-bit guest memory primitives
28
meson: Detect atomic128 support with optimization
29
tcg/i386: Add have_atomic16
30
tcg/aarch64: Detect have_lse, have_lse2 for linux
31
tcg/aarch64: Detect have_lse, have_lse2 for darwin
32
tcg/i386: Use full load/store helpers in user-only mode
33
tcg/aarch64: Use full load/store helpers in user-only mode
34
tcg/ppc: Use full load/store helpers in user-only mode
35
tcg/loongarch64: Use full load/store helpers in user-only mode
36
tcg/riscv: Use full load/store helpers in user-only mode
37
tcg/arm: Adjust constraints on qemu_ld/st
38
tcg/arm: Use full load/store helpers in user-only mode
39
tcg/mips: Use full load/store helpers in user-only mode
40
tcg/s390x: Use full load/store helpers in user-only mode
41
tcg/sparc64: Allocate %g2 as a third temporary
42
tcg/sparc64: Rename tcg_out_movi_imm13 to tcg_out_movi_s13
43
target/sparc64: Remove tcg_out_movi_s13 case from tcg_out_movi_imm32
44
tcg/sparc64: Rename tcg_out_movi_imm32 to tcg_out_movi_u32
45
tcg/sparc64: Split out tcg_out_movi_s32
46
tcg/sparc64: Use standard slow path for softmmu
47
accel/tcg: Remove helper_unaligned_{ld,st}
48
tcg/loongarch64: Check the host supports unaligned accesses
49
tcg/loongarch64: Support softmmu unaligned accesses
50
tcg/riscv: Support softmmu unaligned accesses
51
tcg: Introduce tcg_target_has_memory_bswap
52
tcg: Add INDEX_op_qemu_{ld,st}_i128
53
tcg: Introduce tcg_out_movext3
54
tcg: Merge tcg_out_helper_load_regs into caller
55
tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}
56
tcg: Introduce atom_and_align_for_opc
57
tcg/i386: Use atom_and_align_for_opc
58
tcg/aarch64: Use atom_and_align_for_opc
59
tcg/arm: Use atom_and_align_for_opc
60
tcg/loongarch64: Use atom_and_align_for_opc
61
tcg/mips: Use atom_and_align_for_opc
62
tcg/ppc: Use atom_and_align_for_opc
63
tcg/riscv: Use atom_and_align_for_opc
64
tcg/s390x: Use atom_and_align_for_opc
65
tcg/sparc64: Use atom_and_align_for_opc
66
tcg/i386: Honor 64-bit atomicity in 32-bit mode
67
tcg/i386: Support 128-bit load/store with have_atomic16
68
tcg/aarch64: Rename temporaries
69
tcg/aarch64: Support 128-bit load/store
70
tcg/ppc: Support 128-bit load/store
71
tcg/s390x: Support 128-bit load/store
72
tcg: Split out memory ops to tcg-op-ldst.c
73
tcg: Widen gen_insn_data to uint64_t
74
accel/tcg: Widen tcg-ldst.h addresses to uint64_t
75
tcg: Widen helper_{ld,st}_i128 addresses to uint64_t
76
tcg: Widen helper_atomic_* addresses to uint64_t
77
tcg: Widen tcg_gen_code pc_start argument to uint64_t
78
accel/tcg: Merge gen_mem_wrapped with plugin_gen_empty_mem_callback
79
accel/tcg: Merge do_gen_mem_cb into caller
80
tcg: Reduce copies for plugin_gen_mem_callbacks
81
accel/tcg: Widen plugin_gen_empty_mem_callback to i64
82
tcg: Add addr_type to TCGContext
83
tcg: Remove TCGv from tcg_gen_qemu_{ld,st}_*
84
tcg: Remove TCGv from tcg_gen_atomic_*
85
tcg: Split INDEX_op_qemu_{ld,st}* for guest address size
86
tcg/tci: Elimnate TARGET_LONG_BITS, target_ulong
87
tcg/i386: Always enable TCG_TARGET_HAS_extr[lh]_i64_i32
88
tcg/i386: Conditionalize tcg_out_extu_i32_i64
89
tcg/i386: Adjust type of tlb_mask
90
tcg/i386: Remove TARGET_LONG_BITS, TCG_TYPE_TL
91
tcg/arm: Remove TARGET_LONG_BITS
92
tcg/aarch64: Remove USE_GUEST_BASE
93
tcg/aarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
94
tcg/loongarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
95
tcg/mips: Remove TARGET_LONG_BITS, TCG_TYPE_TL
96
tcg: Remove TARGET_LONG_BITS, TCG_TYPE_TL
97
tcg: Add page_bits and page_mask to TCGContext
98
tcg: Add tlb_dyn_max_bits to TCGContext
99
tcg: Split out exec/user/guest-base.h
20
100
21
Richard Henderson (2):
101
docs/devel/loads-stores.rst | 36 +-
22
tcg: Save/restore vecop_list around minmax fallback
102
docs/devel/tcg-ops.rst | 11 +-
23
tcg/cpu-exec: precise single-stepping after an interrupt
103
meson.build | 52 +-
24
104
accel/tcg/tcg-runtime.h | 49 +-
25
accel/tcg/cpu-exec.c | 19 ++++++++++++++++++-
105
include/exec/cpu-all.h | 5 +-
26
tcg/tcg-op-vec.c | 2 ++
106
include/exec/memop.h | 37 ++
27
2 files changed, 20 insertions(+), 1 deletion(-)
107
include/exec/plugin-gen.h | 4 +-
28
108
include/exec/user/guest-base.h | 12 +
109
include/qemu/cpuid.h | 18 +
110
include/tcg/tcg-ldst.h | 72 +--
111
include/tcg/tcg-op.h | 273 ++++++---
112
include/tcg/tcg-opc.h | 41 +-
113
include/tcg/tcg.h | 39 +-
114
tcg/aarch64/tcg-target-con-set.h | 2 +
115
tcg/aarch64/tcg-target.h | 15 +-
116
tcg/arm/tcg-target-con-set.h | 16 +-
117
tcg/arm/tcg-target-con-str.h | 5 +-
118
tcg/arm/tcg-target.h | 3 +-
119
tcg/i386/tcg-target.h | 13 +-
120
tcg/loongarch64/tcg-target.h | 3 +-
121
tcg/mips/tcg-target.h | 4 +-
122
tcg/ppc/tcg-target-con-set.h | 2 +
123
tcg/ppc/tcg-target-con-str.h | 1 +
124
tcg/ppc/tcg-target.h | 4 +-
125
tcg/riscv/tcg-target.h | 4 +-
126
tcg/s390x/tcg-target-con-set.h | 2 +
127
tcg/s390x/tcg-target.h | 4 +-
128
tcg/sparc64/tcg-target-con-set.h | 2 -
129
tcg/sparc64/tcg-target-con-str.h | 1 -
130
tcg/sparc64/tcg-target.h | 4 +-
131
tcg/tcg-internal.h | 2 +
132
tcg/tci/tcg-target.h | 4 +-
133
accel/tcg/cputlb.c | 839 ++++++++++++++++---------
134
accel/tcg/plugin-gen.c | 68 +-
135
accel/tcg/translate-all.c | 35 +-
136
accel/tcg/user-exec.c | 488 ++++++++++-----
137
tcg/optimize.c | 19 +-
138
tcg/tcg-op-ldst.c | 1234 +++++++++++++++++++++++++++++++++++++
139
tcg/tcg-op.c | 864 --------------------------
140
tcg/tcg.c | 627 +++++++++++++++----
141
tcg/tci.c | 243 +++-----
142
accel/tcg/atomic_common.c.inc | 14 +-
143
accel/tcg/ldst_atomicity.c.inc | 1262 ++++++++++++++++++++++++++++++++++++++
144
tcg/aarch64/tcg-target.c.inc | 438 ++++++++-----
145
tcg/arm/tcg-target.c.inc | 246 +++-----
146
tcg/i386/tcg-target.c.inc | 467 ++++++++++----
147
tcg/loongarch64/tcg-target.c.inc | 123 ++--
148
tcg/mips/tcg-target.c.inc | 216 +++----
149
tcg/ppc/tcg-target.c.inc | 300 +++++----
150
tcg/riscv/tcg-target.c.inc | 161 ++---
151
tcg/s390x/tcg-target.c.inc | 207 ++++---
152
tcg/sparc64/tcg-target.c.inc | 731 ++++++++--------------
153
tcg/tci/tcg-target.c.inc | 58 +-
154
tcg/meson.build | 1 +
155
54 files changed, 5988 insertions(+), 3393 deletions(-)
156
create mode 100644 include/exec/user/guest-base.h
157
create mode 100644 tcg/tcg-op-ldst.c
158
create mode 100644 accel/tcg/ldst_atomicity.c.inc
diff view generated by jsdifflib
New patch
1
The REXW bit must be set to produce a 64-bit pointer result; the
2
bit is disabled in 32-bit mode, so we can do this unconditionally.
1
3
4
Fixes: 7d9e1ee424b0 ("tcg/i386: Adjust assert in tcg_out_addi_ptr")
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1592
6
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1642
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.c.inc | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
17
{
18
/* This function is only used for passing structs by reference. */
19
tcg_debug_assert(imm == (int32_t)imm);
20
- tcg_out_modrm_offset(s, OPC_LEA, rd, rs, imm);
21
+ tcg_out_modrm_offset(s, OPC_LEA | P_REXW, rd, rs, imm);
22
}
23
24
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
25
--
26
2.34.1
diff view generated by jsdifflib
New patch
1
This field may be used to describe the precise atomicity requirements
2
of the guest, which may then be used to constrain the methods by which
3
it may be emulated by the host.
1
4
5
For instance, the AArch64 LDP (32-bit) instruction changes semantics
6
with ARMv8.4 LSE2, from
7
8
MO_64 | MO_ATOM_IFALIGN_PAIR
9
(64-bits, single-copy atomic only on 4 byte units,
10
nonatomic if not aligned by 4),
11
12
to
13
14
MO_64 | MO_ATOM_WITHIN16
15
(64-bits, single-copy atomic within a 16 byte block)
16
17
The former may be implemented with two 4 byte loads, or a single 8 byte
18
load if that happens to be efficient on the host. The latter may not
19
be implemented with two 4 byte loads and may also require a helper when
20
misaligned.
21
22
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
include/exec/memop.h | 37 +++++++++++++++++++++++++++++++++++++
26
tcg/tcg.c | 27 +++++++++++++++++++++------
27
2 files changed, 58 insertions(+), 6 deletions(-)
28
29
diff --git a/include/exec/memop.h b/include/exec/memop.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/exec/memop.h
32
+++ b/include/exec/memop.h
33
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
34
MO_ALIGN_64 = 6 << MO_ASHIFT,
35
MO_ALIGN = MO_AMASK,
36
37
+ /*
38
+ * MO_ATOM_* describes the atomicity requirements of the operation:
39
+ * MO_ATOM_IFALIGN: the operation must be single-copy atomic if it
40
+ * is aligned; if unaligned there is no atomicity.
41
+ * MO_ATOM_IFALIGN_PAIR: the entire operation may be considered to
42
+ * be a pair of half-sized operations which are packed together
43
+ * for convenience, with single-copy atomicity on each half if
44
+ * the half is aligned.
45
+ * This is the atomicity e.g. of Arm pre-FEAT_LSE2 LDP.
46
+ * MO_ATOM_WITHIN16: the operation is single-copy atomic, even if it
47
+ * is unaligned, so long as it does not cross a 16-byte boundary;
48
+ * if it crosses a 16-byte boundary there is no atomicity.
49
+ * This is the atomicity e.g. of Arm FEAT_LSE2 LDR.
50
+ * MO_ATOM_WITHIN16_PAIR: the entire operation is single-copy atomic,
51
+ * if it happens to be within a 16-byte boundary, otherwise it
52
+ * devolves to a pair of half-sized MO_ATOM_WITHIN16 operations.
53
+ * Depending on alignment, one or both will be single-copy atomic.
54
+ * This is the atomicity e.g. of Arm FEAT_LSE2 LDP.
55
+ * MO_ATOM_SUBALIGN: the operation is single-copy atomic by parts
56
+ * by the alignment. E.g. if the address is 0 mod 4, then each
57
+ * 4-byte subobject is single-copy atomic.
58
+ * This is the atomicity e.g. of IBM Power.
59
+ * MO_ATOM_NONE: the operation has no atomicity requirements.
60
+ *
61
+ * Note the default (i.e. 0) value is single-copy atomic to the
62
+ * size of the operation, if aligned. This retains the behaviour
63
+ * from before this field was introduced.
64
+ */
65
+ MO_ATOM_SHIFT = 8,
66
+ MO_ATOM_IFALIGN = 0 << MO_ATOM_SHIFT,
67
+ MO_ATOM_IFALIGN_PAIR = 1 << MO_ATOM_SHIFT,
68
+ MO_ATOM_WITHIN16 = 2 << MO_ATOM_SHIFT,
69
+ MO_ATOM_WITHIN16_PAIR = 3 << MO_ATOM_SHIFT,
70
+ MO_ATOM_SUBALIGN = 4 << MO_ATOM_SHIFT,
71
+ MO_ATOM_NONE = 5 << MO_ATOM_SHIFT,
72
+ MO_ATOM_MASK = 7 << MO_ATOM_SHIFT,
73
+
74
/* Combinations of the above, for ease of use. */
75
MO_UB = MO_8,
76
MO_UW = MO_16,
77
diff --git a/tcg/tcg.c b/tcg/tcg.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/tcg.c
80
+++ b/tcg/tcg.c
81
@@ -XXX,XX +XXX,XX @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
82
[MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
83
};
84
85
+static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
86
+ [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
87
+ [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
88
+ [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
89
+ [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
90
+ [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
91
+ [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
92
+};
93
+
94
static const char bswap_flag_name[][6] = {
95
[TCG_BSWAP_IZ] = "iz",
96
[TCG_BSWAP_OZ] = "oz",
97
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
98
case INDEX_op_qemu_ld_i64:
99
case INDEX_op_qemu_st_i64:
100
{
101
+ const char *s_al, *s_op, *s_at;
102
MemOpIdx oi = op->args[k++];
103
MemOp op = get_memop(oi);
104
unsigned ix = get_mmuidx(oi);
105
106
- if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
107
- col += ne_fprintf(f, ",$0x%x,%u", op, ix);
108
+ s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
109
+ s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
110
+ s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
111
+ op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
112
+
113
+ /* If all fields are accounted for, print symbolically. */
114
+ if (!op && s_al && s_op && s_at) {
115
+ col += ne_fprintf(f, ",%s%s%s,%u",
116
+ s_at, s_al, s_op, ix);
117
} else {
118
- const char *s_al, *s_op;
119
- s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
120
- s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
121
- col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
122
+ op = get_memop(oi);
123
+ col += ne_fprintf(f, ",$0x%x,%u", op, ix);
124
}
125
i = 1;
126
}
127
--
128
2.34.1
diff view generated by jsdifflib
New patch
1
Create ldst_atomicity.c.inc.
1
2
3
Not required for user-only code loads, because we've ensured that
4
the page is read-only before beginning to translate code.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
accel/tcg/cputlb.c | 175 +++++++---
10
accel/tcg/user-exec.c | 26 +-
11
accel/tcg/ldst_atomicity.c.inc | 566 +++++++++++++++++++++++++++++++++
12
3 files changed, 716 insertions(+), 51 deletions(-)
13
create mode 100644 accel/tcg/ldst_atomicity.c.inc
14
15
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/accel/tcg/cputlb.c
18
+++ b/accel/tcg/cputlb.c
19
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
20
return qemu_ram_addr_from_host_nofail(p);
21
}
22
23
+/* Load/store with atomicity primitives. */
24
+#include "ldst_atomicity.c.inc"
25
+
26
#ifdef CONFIG_PLUGIN
27
/*
28
* Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
29
@@ -XXX,XX +XXX,XX @@ static void validate_memop(MemOpIdx oi, MemOp expected)
30
* specifically for reading instructions from system memory. It is
31
* called by the translation loop and in some helpers where the code
32
* is disassembled. It shouldn't be called directly by guest code.
33
- */
34
-
35
-typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
36
- MemOpIdx oi, uintptr_t retaddr);
37
-
38
-static inline uint64_t QEMU_ALWAYS_INLINE
39
-load_memop(const void *haddr, MemOp op)
40
-{
41
- switch (op) {
42
- case MO_UB:
43
- return ldub_p(haddr);
44
- case MO_BEUW:
45
- return lduw_be_p(haddr);
46
- case MO_LEUW:
47
- return lduw_le_p(haddr);
48
- case MO_BEUL:
49
- return (uint32_t)ldl_be_p(haddr);
50
- case MO_LEUL:
51
- return (uint32_t)ldl_le_p(haddr);
52
- case MO_BEUQ:
53
- return ldq_be_p(haddr);
54
- case MO_LEUQ:
55
- return ldq_le_p(haddr);
56
- default:
57
- qemu_build_not_reached();
58
- }
59
-}
60
-
61
-/*
62
+ *
63
* For the benefit of TCG generated code, we want to avoid the
64
* complication of ABI-specific return type promotion and always
65
* return a value extended to the register size of the host. This is
66
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
67
return ret_be;
68
}
69
70
+/**
71
+ * do_ld_parts_beN
72
+ * @p: translation parameters
73
+ * @ret_be: accumulated data
74
+ *
75
+ * As do_ld_bytes_beN, but atomically on each aligned part.
76
+ */
77
+static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
78
+{
79
+ void *haddr = p->haddr;
80
+ int size = p->size;
81
+
82
+ do {
83
+ uint64_t x;
84
+ int n;
85
+
86
+ /*
87
+ * Find minimum of alignment and size.
88
+ * This is slightly stronger than required by MO_ATOM_SUBALIGN, which
89
+ * would have only checked the low bits of addr|size once at the start,
90
+ * but is just as easy.
91
+ */
92
+ switch (((uintptr_t)haddr | size) & 7) {
93
+ case 4:
94
+ x = cpu_to_be32(load_atomic4(haddr));
95
+ ret_be = (ret_be << 32) | x;
96
+ n = 4;
97
+ break;
98
+ case 2:
99
+ case 6:
100
+ x = cpu_to_be16(load_atomic2(haddr));
101
+ ret_be = (ret_be << 16) | x;
102
+ n = 2;
103
+ break;
104
+ default:
105
+ x = *(uint8_t *)haddr;
106
+ ret_be = (ret_be << 8) | x;
107
+ n = 1;
108
+ break;
109
+ case 0:
110
+ g_assert_not_reached();
111
+ }
112
+ haddr += n;
113
+ size -= n;
114
+ } while (size != 0);
115
+ return ret_be;
116
+}
117
+
118
+/**
119
+ * do_ld_parts_be4
120
+ * @p: translation parameters
121
+ * @ret_be: accumulated data
122
+ *
123
+ * As do_ld_bytes_beN, but with one atomic load.
124
+ * Four aligned bytes are guaranteed to cover the load.
125
+ */
126
+static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
127
+{
128
+ int o = p->addr & 3;
129
+ uint32_t x = load_atomic4(p->haddr - o);
130
+
131
+ x = cpu_to_be32(x);
132
+ x <<= o * 8;
133
+ x >>= (4 - p->size) * 8;
134
+ return (ret_be << (p->size * 8)) | x;
135
+}
136
+
137
+/**
138
+ * do_ld_parts_be8
139
+ * @p: translation parameters
140
+ * @ret_be: accumulated data
141
+ *
142
+ * As do_ld_bytes_beN, but with one atomic load.
143
+ * Eight aligned bytes are guaranteed to cover the load.
144
+ */
145
+static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
146
+ MMULookupPageData *p, uint64_t ret_be)
147
+{
148
+ int o = p->addr & 7;
149
+ uint64_t x = load_atomic8_or_exit(env, ra, p->haddr - o);
150
+
151
+ x = cpu_to_be64(x);
152
+ x <<= o * 8;
153
+ x >>= (8 - p->size) * 8;
154
+ return (ret_be << (p->size * 8)) | x;
155
+}
156
+
157
/*
158
* Wrapper for the above.
159
*/
160
static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
161
- uint64_t ret_be, int mmu_idx,
162
- MMUAccessType type, uintptr_t ra)
163
+ uint64_t ret_be, int mmu_idx, MMUAccessType type,
164
+ MemOp mop, uintptr_t ra)
165
{
166
+ MemOp atom;
167
+ unsigned tmp, half_size;
168
+
169
if (unlikely(p->flags & TLB_MMIO)) {
170
return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
171
- } else {
172
+ }
173
+
174
+ /*
175
+ * It is a given that we cross a page and therefore there is no
176
+ * atomicity for the load as a whole, but subobjects may need attention.
177
+ */
178
+ atom = mop & MO_ATOM_MASK;
179
+ switch (atom) {
180
+ case MO_ATOM_SUBALIGN:
181
+ return do_ld_parts_beN(p, ret_be);
182
+
183
+ case MO_ATOM_IFALIGN_PAIR:
184
+ case MO_ATOM_WITHIN16_PAIR:
185
+ tmp = mop & MO_SIZE;
186
+ tmp = tmp ? tmp - 1 : 0;
187
+ half_size = 1 << tmp;
188
+ if (atom == MO_ATOM_IFALIGN_PAIR
189
+ ? p->size == half_size
190
+ : p->size >= half_size) {
191
+ if (!HAVE_al8_fast && p->size < 4) {
192
+ return do_ld_whole_be4(p, ret_be);
193
+ } else {
194
+ return do_ld_whole_be8(env, ra, p, ret_be);
195
+ }
196
+ }
197
+ /* fall through */
198
+
199
+ case MO_ATOM_IFALIGN:
200
+ case MO_ATOM_WITHIN16:
201
+ case MO_ATOM_NONE:
202
return do_ld_bytes_beN(p, ret_be);
203
+
204
+ default:
205
+ g_assert_not_reached();
206
}
207
}
208
209
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
210
}
211
212
/* Perform the load host endian, then swap if necessary. */
213
- ret = load_memop(p->haddr, MO_UW);
214
+ ret = load_atom_2(env, ra, p->haddr, memop);
215
if (memop & MO_BSWAP) {
216
ret = bswap16(ret);
217
}
218
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
219
}
220
221
/* Perform the load host endian. */
222
- ret = load_memop(p->haddr, MO_UL);
223
+ ret = load_atom_4(env, ra, p->haddr, memop);
224
if (memop & MO_BSWAP) {
225
ret = bswap32(ret);
226
}
227
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
228
}
229
230
/* Perform the load host endian. */
231
- ret = load_memop(p->haddr, MO_UQ);
232
+ ret = load_atom_8(env, ra, p->haddr, memop);
233
if (memop & MO_BSWAP) {
234
ret = bswap64(ret);
235
}
236
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
237
return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
238
}
239
240
- ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
241
- ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
242
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
243
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
244
if ((l.memop & MO_BSWAP) == MO_LE) {
245
ret = bswap32(ret);
246
}
247
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
248
return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
249
}
250
251
- ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
252
- ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
253
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
254
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
255
if ((l.memop & MO_BSWAP) == MO_LE) {
256
ret = bswap64(ret);
257
}
258
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
259
index XXXXXXX..XXXXXXX 100644
260
--- a/accel/tcg/user-exec.c
261
+++ b/accel/tcg/user-exec.c
262
@@ -XXX,XX +XXX,XX @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
263
return ret;
264
}
265
266
+#include "ldst_atomicity.c.inc"
267
+
268
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
269
MemOpIdx oi, uintptr_t ra)
270
{
271
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
272
273
validate_memop(oi, MO_BEUW);
274
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
275
- ret = lduw_be_p(haddr);
276
+ ret = load_atom_2(env, ra, haddr, get_memop(oi));
277
clear_helper_retaddr();
278
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
279
- return ret;
280
+ return cpu_to_be16(ret);
281
}
282
283
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
284
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
285
286
validate_memop(oi, MO_BEUL);
287
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
288
- ret = ldl_be_p(haddr);
289
+ ret = load_atom_4(env, ra, haddr, get_memop(oi));
290
clear_helper_retaddr();
291
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
292
- return ret;
293
+ return cpu_to_be32(ret);
294
}
295
296
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
297
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
298
299
validate_memop(oi, MO_BEUQ);
300
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
301
- ret = ldq_be_p(haddr);
302
+ ret = load_atom_8(env, ra, haddr, get_memop(oi));
303
clear_helper_retaddr();
304
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
305
- return ret;
306
+ return cpu_to_be64(ret);
307
}
308
309
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
310
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
311
312
validate_memop(oi, MO_LEUW);
313
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
314
- ret = lduw_le_p(haddr);
315
+ ret = load_atom_2(env, ra, haddr, get_memop(oi));
316
clear_helper_retaddr();
317
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
318
- return ret;
319
+ return cpu_to_le16(ret);
320
}
321
322
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
323
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
324
325
validate_memop(oi, MO_LEUL);
326
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
327
- ret = ldl_le_p(haddr);
328
+ ret = load_atom_4(env, ra, haddr, get_memop(oi));
329
clear_helper_retaddr();
330
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
331
- return ret;
332
+ return cpu_to_le32(ret);
333
}
334
335
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
336
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
337
338
validate_memop(oi, MO_LEUQ);
339
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
340
- ret = ldq_le_p(haddr);
341
+ ret = load_atom_8(env, ra, haddr, get_memop(oi));
342
clear_helper_retaddr();
343
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
344
- return ret;
345
+ return cpu_to_le64(ret);
346
}
347
348
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
349
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
350
new file mode 100644
351
index XXXXXXX..XXXXXXX
352
--- /dev/null
353
+++ b/accel/tcg/ldst_atomicity.c.inc
354
@@ -XXX,XX +XXX,XX @@
355
+/*
356
+ * Routines common to user and system emulation of load/store.
357
+ *
358
+ * Copyright (c) 2022 Linaro, Ltd.
359
+ *
360
+ * SPDX-License-Identifier: GPL-2.0-or-later
361
+ *
362
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
363
+ * See the COPYING file in the top-level directory.
364
+ */
365
+
366
+#ifdef CONFIG_ATOMIC64
367
+# define HAVE_al8 true
368
+#else
369
+# define HAVE_al8 false
370
+#endif
371
+#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
372
+
373
+#if defined(CONFIG_ATOMIC128)
374
+# define HAVE_al16_fast true
375
+#else
376
+# define HAVE_al16_fast false
377
+#endif
378
+
379
+/**
380
+ * required_atomicity:
381
+ *
382
+ * Return the lg2 bytes of atomicity required by @memop for @p.
383
+ * If the operation must be split into two operations to be
384
+ * examined separately for atomicity, return -lg2.
385
+ */
386
+static int required_atomicity(CPUArchState *env, uintptr_t p, MemOp memop)
387
+{
388
+ MemOp atom = memop & MO_ATOM_MASK;
389
+ MemOp size = memop & MO_SIZE;
390
+ MemOp half = size ? size - 1 : 0;
391
+ unsigned tmp;
392
+ int atmax;
393
+
394
+ switch (atom) {
395
+ case MO_ATOM_NONE:
396
+ atmax = MO_8;
397
+ break;
398
+
399
+ case MO_ATOM_IFALIGN_PAIR:
400
+ size = half;
401
+ /* fall through */
402
+
403
+ case MO_ATOM_IFALIGN:
404
+ tmp = (1 << size) - 1;
405
+ atmax = p & tmp ? MO_8 : size;
406
+ break;
407
+
408
+ case MO_ATOM_WITHIN16:
409
+ tmp = p & 15;
410
+ atmax = (tmp + (1 << size) <= 16 ? size : MO_8);
411
+ break;
412
+
413
+ case MO_ATOM_WITHIN16_PAIR:
414
+ tmp = p & 15;
415
+ if (tmp + (1 << size) <= 16) {
416
+ atmax = size;
417
+ } else if (tmp + (1 << half) == 16) {
418
+ /*
419
+ * The pair exactly straddles the boundary.
420
+ * Both halves are naturally aligned and atomic.
421
+ */
422
+ atmax = half;
423
+ } else {
424
+ /*
425
+ * One of the pair crosses the boundary, and is non-atomic.
426
+ * The other of the pair does not cross, and is atomic.
427
+ */
428
+ atmax = -half;
429
+ }
430
+ break;
431
+
432
+ case MO_ATOM_SUBALIGN:
433
+ /*
434
+ * Examine the alignment of p to determine if there are subobjects
435
+ * that must be aligned. Note that we only really need ctz4() --
436
+ * any more sigificant bits are discarded by the immediately
437
+ * following comparison.
438
+ */
439
+ tmp = ctz32(p);
440
+ atmax = MIN(size, tmp);
441
+ break;
442
+
443
+ default:
444
+ g_assert_not_reached();
445
+ }
446
+
447
+ /*
448
+ * Here we have the architectural atomicity of the operation.
449
+ * However, when executing in a serial context, we need no extra
450
+ * host atomicity in order to avoid racing. This reduction
451
+ * avoids looping with cpu_loop_exit_atomic.
452
+ */
453
+ if (cpu_in_serial_context(env_cpu(env))) {
454
+ return MO_8;
455
+ }
456
+ return atmax;
457
+}
458
+
459
+/**
460
+ * load_atomic2:
461
+ * @pv: host address
462
+ *
463
+ * Atomically load 2 aligned bytes from @pv.
464
+ */
465
+static inline uint16_t load_atomic2(void *pv)
466
+{
467
+ uint16_t *p = __builtin_assume_aligned(pv, 2);
468
+ return qatomic_read(p);
469
+}
470
+
471
+/**
472
+ * load_atomic4:
473
+ * @pv: host address
474
+ *
475
+ * Atomically load 4 aligned bytes from @pv.
476
+ */
477
+static inline uint32_t load_atomic4(void *pv)
478
+{
479
+ uint32_t *p = __builtin_assume_aligned(pv, 4);
480
+ return qatomic_read(p);
481
+}
482
+
483
+/**
484
+ * load_atomic8:
485
+ * @pv: host address
486
+ *
487
+ * Atomically load 8 aligned bytes from @pv.
488
+ */
489
+static inline uint64_t load_atomic8(void *pv)
490
+{
491
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
492
+
493
+ qemu_build_assert(HAVE_al8);
494
+ return qatomic_read__nocheck(p);
495
+}
496
+
497
+/**
498
+ * load_atomic16:
499
+ * @pv: host address
500
+ *
501
+ * Atomically load 16 aligned bytes from @pv.
502
+ */
503
+static inline Int128 load_atomic16(void *pv)
504
+{
505
+#ifdef CONFIG_ATOMIC128
506
+ __uint128_t *p = __builtin_assume_aligned(pv, 16);
507
+ Int128Alias r;
508
+
509
+ r.u = qatomic_read__nocheck(p);
510
+ return r.s;
511
+#else
512
+ qemu_build_not_reached();
513
+#endif
514
+}
515
+
516
+/**
517
+ * load_atomic8_or_exit:
518
+ * @env: cpu context
519
+ * @ra: host unwind address
520
+ * @pv: host address
521
+ *
522
+ * Atomically load 8 aligned bytes from @pv.
523
+ * If this is not possible, longjmp out to restart serially.
524
+ */
525
+static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
526
+{
527
+ if (HAVE_al8) {
528
+ return load_atomic8(pv);
529
+ }
530
+
531
+#ifdef CONFIG_USER_ONLY
532
+ /*
533
+ * If the page is not writable, then assume the value is immutable
534
+ * and requires no locking. This ignores the case of MAP_SHARED with
535
+ * another process, because the fallback start_exclusive solution
536
+ * provides no protection across processes.
537
+ */
538
+ if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) {
539
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
540
+ return *p;
541
+ }
542
+#endif
543
+
544
+ /* Ultimate fallback: re-execute in serial context. */
545
+ cpu_loop_exit_atomic(env_cpu(env), ra);
546
+}
547
+
548
+/**
549
+ * load_atomic16_or_exit:
550
+ * @env: cpu context
551
+ * @ra: host unwind address
552
+ * @pv: host address
553
+ *
554
+ * Atomically load 16 aligned bytes from @pv.
555
+ * If this is not possible, longjmp out to restart serially.
556
+ */
557
+static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
558
+{
559
+ Int128 *p = __builtin_assume_aligned(pv, 16);
560
+
561
+ if (HAVE_al16_fast) {
562
+ return load_atomic16(p);
563
+ }
564
+
565
+#ifdef CONFIG_USER_ONLY
566
+ /*
567
+ * We can only use cmpxchg to emulate a load if the page is writable.
568
+ * If the page is not writable, then assume the value is immutable
569
+ * and requires no locking. This ignores the case of MAP_SHARED with
570
+ * another process, because the fallback start_exclusive solution
571
+ * provides no protection across processes.
572
+ */
573
+ if (!page_check_range(h2g(p), 16, PAGE_WRITE)) {
574
+ return *p;
575
+ }
576
+#endif
577
+
578
+ /*
579
+ * In system mode all guest pages are writable, and for user-only
580
+ * we have just checked writability. Try cmpxchg.
581
+ */
582
+#if defined(CONFIG_CMPXCHG128)
583
+ /* Swap 0 with 0, with the side-effect of returning the old value. */
584
+ {
585
+ Int128Alias r;
586
+ r.u = __sync_val_compare_and_swap_16((__uint128_t *)p, 0, 0);
587
+ return r.s;
588
+ }
589
+#endif
590
+
591
+ /* Ultimate fallback: re-execute in serial context. */
592
+ cpu_loop_exit_atomic(env_cpu(env), ra);
593
+}
594
+
595
+/**
596
+ * load_atom_extract_al4x2:
597
+ * @pv: host address
598
+ *
599
+ * Load 4 bytes from @p, from two sequential atomic 4-byte loads.
600
+ */
601
+static uint32_t load_atom_extract_al4x2(void *pv)
602
+{
603
+ uintptr_t pi = (uintptr_t)pv;
604
+ int sh = (pi & 3) * 8;
605
+ uint32_t a, b;
606
+
607
+ pv = (void *)(pi & ~3);
608
+ a = load_atomic4(pv);
609
+ b = load_atomic4(pv + 4);
610
+
611
+ if (HOST_BIG_ENDIAN) {
612
+ return (a << sh) | (b >> (-sh & 31));
613
+ } else {
614
+ return (a >> sh) | (b << (-sh & 31));
615
+ }
616
+}
617
+
618
+/**
619
+ * load_atom_extract_al8x2:
620
+ * @pv: host address
621
+ *
622
+ * Load 8 bytes from @p, from two sequential atomic 8-byte loads.
623
+ */
624
+static uint64_t load_atom_extract_al8x2(void *pv)
625
+{
626
+ uintptr_t pi = (uintptr_t)pv;
627
+ int sh = (pi & 7) * 8;
628
+ uint64_t a, b;
629
+
630
+ pv = (void *)(pi & ~7);
631
+ a = load_atomic8(pv);
632
+ b = load_atomic8(pv + 8);
633
+
634
+ if (HOST_BIG_ENDIAN) {
635
+ return (a << sh) | (b >> (-sh & 63));
636
+ } else {
637
+ return (a >> sh) | (b << (-sh & 63));
638
+ }
639
+}
640
+
641
+/**
642
+ * load_atom_extract_al8_or_exit:
643
+ * @env: cpu context
644
+ * @ra: host unwind address
645
+ * @pv: host address
646
+ * @s: object size in bytes, @s <= 4.
647
+ *
648
+ * Atomically load @s bytes from @p, when p % s != 0, and [p, p+s-1] does
649
+ * not cross an 8-byte boundary. This means that we can perform an atomic
650
+ * 8-byte load and extract.
651
+ * The value is returned in the low bits of a uint32_t.
652
+ */
653
+static uint32_t load_atom_extract_al8_or_exit(CPUArchState *env, uintptr_t ra,
654
+ void *pv, int s)
655
+{
656
+ uintptr_t pi = (uintptr_t)pv;
657
+ int o = pi & 7;
658
+ int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
659
+
660
+ pv = (void *)(pi & ~7);
661
+ return load_atomic8_or_exit(env, ra, pv) >> shr;
662
+}
663
+
664
+/**
665
+ * load_atom_extract_al16_or_exit:
666
+ * @env: cpu context
667
+ * @ra: host unwind address
668
+ * @p: host address
669
+ * @s: object size in bytes, @s <= 8.
670
+ *
671
+ * Atomically load @s bytes from @p, when p % 16 < 8
672
+ * and p % 16 + s > 8. I.e. does not cross a 16-byte
673
+ * boundary, but *does* cross an 8-byte boundary.
674
+ * This is the slow version, so we must have eliminated
675
+ * any faster load_atom_extract_al8_or_exit case.
676
+ *
677
+ * If this is not possible, longjmp out to restart serially.
678
+ */
679
+static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
680
+ void *pv, int s)
681
+{
682
+ uintptr_t pi = (uintptr_t)pv;
683
+ int o = pi & 7;
684
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
685
+ Int128 r;
686
+
687
+ /*
688
+ * Note constraints above: p & 8 must be clear.
689
+ * Provoke SIGBUS if possible otherwise.
690
+ */
691
+ pv = (void *)(pi & ~7);
692
+ r = load_atomic16_or_exit(env, ra, pv);
693
+
694
+ r = int128_urshift(r, shr);
695
+ return int128_getlo(r);
696
+}
697
+
698
+/**
699
+ * load_atom_extract_al16_or_al8:
700
+ * @p: host address
701
+ * @s: object size in bytes, @s <= 8.
702
+ *
703
+ * Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not
704
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
705
+ * otherwise the access must be 8-byte atomic.
706
+ */
707
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
708
+{
709
+#if defined(CONFIG_ATOMIC128)
710
+ uintptr_t pi = (uintptr_t)pv;
711
+ int o = pi & 7;
712
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
713
+ __uint128_t r;
714
+
715
+ pv = (void *)(pi & ~7);
716
+ if (pi & 8) {
717
+ uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
718
+ uint64_t a = qatomic_read__nocheck(p8);
719
+ uint64_t b = qatomic_read__nocheck(p8 + 1);
720
+
721
+ if (HOST_BIG_ENDIAN) {
722
+ r = ((__uint128_t)a << 64) | b;
723
+ } else {
724
+ r = ((__uint128_t)b << 64) | a;
725
+ }
726
+ } else {
727
+ __uint128_t *p16 = __builtin_assume_aligned(pv, 16, 0);
728
+ r = qatomic_read__nocheck(p16);
729
+ }
730
+ return r >> shr;
731
+#else
732
+ qemu_build_not_reached();
733
+#endif
734
+}
735
+
736
+/**
737
+ * load_atom_4_by_2:
738
+ * @pv: host address
739
+ *
740
+ * Load 4 bytes from @pv, with two 2-byte atomic loads.
741
+ */
742
+static inline uint32_t load_atom_4_by_2(void *pv)
743
+{
744
+ uint32_t a = load_atomic2(pv);
745
+ uint32_t b = load_atomic2(pv + 2);
746
+
747
+ if (HOST_BIG_ENDIAN) {
748
+ return (a << 16) | b;
749
+ } else {
750
+ return (b << 16) | a;
751
+ }
752
+}
753
+
754
+/**
755
+ * load_atom_8_by_2:
756
+ * @pv: host address
757
+ *
758
+ * Load 8 bytes from @pv, with four 2-byte atomic loads.
759
+ */
760
+static inline uint64_t load_atom_8_by_2(void *pv)
761
+{
762
+ uint32_t a = load_atom_4_by_2(pv);
763
+ uint32_t b = load_atom_4_by_2(pv + 4);
764
+
765
+ if (HOST_BIG_ENDIAN) {
766
+ return ((uint64_t)a << 32) | b;
767
+ } else {
768
+ return ((uint64_t)b << 32) | a;
769
+ }
770
+}
771
+
772
+/**
773
+ * load_atom_8_by_4:
774
+ * @pv: host address
775
+ *
776
+ * Load 8 bytes from @pv, with two 4-byte atomic loads.
777
+ */
778
+static inline uint64_t load_atom_8_by_4(void *pv)
779
+{
780
+ uint32_t a = load_atomic4(pv);
781
+ uint32_t b = load_atomic4(pv + 4);
782
+
783
+ if (HOST_BIG_ENDIAN) {
784
+ return ((uint64_t)a << 32) | b;
785
+ } else {
786
+ return ((uint64_t)b << 32) | a;
787
+ }
788
+}
789
+
790
+/**
791
+ * load_atom_2:
792
+ * @p: host address
793
+ * @memop: the full memory op
794
+ *
795
+ * Load 2 bytes from @p, honoring the atomicity of @memop.
796
+ */
797
+static uint16_t load_atom_2(CPUArchState *env, uintptr_t ra,
798
+ void *pv, MemOp memop)
799
+{
800
+ uintptr_t pi = (uintptr_t)pv;
801
+ int atmax;
802
+
803
+ if (likely((pi & 1) == 0)) {
804
+ return load_atomic2(pv);
805
+ }
806
+ if (HAVE_al16_fast) {
807
+ return load_atom_extract_al16_or_al8(pv, 2);
808
+ }
809
+
810
+ atmax = required_atomicity(env, pi, memop);
811
+ switch (atmax) {
812
+ case MO_8:
813
+ return lduw_he_p(pv);
814
+ case MO_16:
815
+ /* The only case remaining is MO_ATOM_WITHIN16. */
816
+ if (!HAVE_al8_fast && (pi & 3) == 1) {
817
+ /* Big or little endian, we want the middle two bytes. */
818
+ return load_atomic4(pv - 1) >> 8;
819
+ }
820
+ if ((pi & 15) != 7) {
821
+ return load_atom_extract_al8_or_exit(env, ra, pv, 2);
822
+ }
823
+ return load_atom_extract_al16_or_exit(env, ra, pv, 2);
824
+ default:
825
+ g_assert_not_reached();
826
+ }
827
+}
828
+
829
+/**
830
+ * load_atom_4:
831
+ * @p: host address
832
+ * @memop: the full memory op
833
+ *
834
+ * Load 4 bytes from @p, honoring the atomicity of @memop.
835
+ */
836
+static uint32_t load_atom_4(CPUArchState *env, uintptr_t ra,
837
+ void *pv, MemOp memop)
838
+{
839
+ uintptr_t pi = (uintptr_t)pv;
840
+ int atmax;
841
+
842
+ if (likely((pi & 3) == 0)) {
843
+ return load_atomic4(pv);
844
+ }
845
+ if (HAVE_al16_fast) {
846
+ return load_atom_extract_al16_or_al8(pv, 4);
847
+ }
848
+
849
+ atmax = required_atomicity(env, pi, memop);
850
+ switch (atmax) {
851
+ case MO_8:
852
+ case MO_16:
853
+ case -MO_16:
854
+ /*
855
+ * For MO_ATOM_IFALIGN, this is more atomicity than required,
856
+ * but it's trivially supported on all hosts, better than 4
857
+ * individual byte loads (when the host requires alignment),
858
+ * and overlaps with the MO_ATOM_SUBALIGN case of p % 2 == 0.
859
+ */
860
+ return load_atom_extract_al4x2(pv);
861
+ case MO_32:
862
+ if (!(pi & 4)) {
863
+ return load_atom_extract_al8_or_exit(env, ra, pv, 4);
864
+ }
865
+ return load_atom_extract_al16_or_exit(env, ra, pv, 4);
866
+ default:
867
+ g_assert_not_reached();
868
+ }
869
+}
870
+
871
+/**
872
+ * load_atom_8:
873
+ * @p: host address
874
+ * @memop: the full memory op
875
+ *
876
+ * Load 8 bytes from @p, honoring the atomicity of @memop.
877
+ */
878
+static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
879
+ void *pv, MemOp memop)
880
+{
881
+ uintptr_t pi = (uintptr_t)pv;
882
+ int atmax;
883
+
884
+ /*
885
+ * If the host does not support 8-byte atomics, wait until we have
886
+ * examined the atomicity parameters below.
887
+ */
888
+ if (HAVE_al8 && likely((pi & 7) == 0)) {
889
+ return load_atomic8(pv);
890
+ }
891
+ if (HAVE_al16_fast) {
892
+ return load_atom_extract_al16_or_al8(pv, 8);
893
+ }
894
+
895
+ atmax = required_atomicity(env, pi, memop);
896
+ if (atmax == MO_64) {
897
+ if (!HAVE_al8 && (pi & 7) == 0) {
898
+ load_atomic8_or_exit(env, ra, pv);
899
+ }
900
+ return load_atom_extract_al16_or_exit(env, ra, pv, 8);
901
+ }
902
+ if (HAVE_al8_fast) {
903
+ return load_atom_extract_al8x2(pv);
904
+ }
905
+ switch (atmax) {
906
+ case MO_8:
907
+ return ldq_he_p(pv);
908
+ case MO_16:
909
+ return load_atom_8_by_2(pv);
910
+ case MO_32:
911
+ return load_atom_8_by_4(pv);
912
+ case -MO_32:
913
+ if (HAVE_al8) {
914
+ return load_atom_extract_al8x2(pv);
915
+ }
916
+ cpu_loop_exit_atomic(env_cpu(env), ra);
917
+ default:
918
+ g_assert_not_reached();
919
+ }
920
+}
921
--
922
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
accel/tcg/cputlb.c | 108 ++++----
5
accel/tcg/user-exec.c | 12 +-
6
accel/tcg/ldst_atomicity.c.inc | 491 +++++++++++++++++++++++++++++++++
7
3 files changed, 545 insertions(+), 66 deletions(-)
1
8
9
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/accel/tcg/cputlb.c
12
+++ b/accel/tcg/cputlb.c
13
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
14
* Store Helpers
15
*/
16
17
-static inline void QEMU_ALWAYS_INLINE
18
-store_memop(void *haddr, uint64_t val, MemOp op)
19
-{
20
- switch (op) {
21
- case MO_UB:
22
- stb_p(haddr, val);
23
- break;
24
- case MO_BEUW:
25
- stw_be_p(haddr, val);
26
- break;
27
- case MO_LEUW:
28
- stw_le_p(haddr, val);
29
- break;
30
- case MO_BEUL:
31
- stl_be_p(haddr, val);
32
- break;
33
- case MO_LEUL:
34
- stl_le_p(haddr, val);
35
- break;
36
- case MO_BEUQ:
37
- stq_be_p(haddr, val);
38
- break;
39
- case MO_LEUQ:
40
- stq_le_p(haddr, val);
41
- break;
42
- default:
43
- qemu_build_not_reached();
44
- }
45
-}
46
-
47
/**
48
* do_st_mmio_leN:
49
* @env: cpu context
50
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
51
return val_le;
52
}
53
54
-/**
55
- * do_st_bytes_leN:
56
- * @p: translation parameters
57
- * @val_le: data to store
58
- *
59
- * Store @p->size bytes at @p->haddr, which is RAM.
60
- * The bytes to store are extracted in little-endian order from @val_le;
61
- * return the bytes of @val_le beyond @p->size that have not been stored.
62
- */
63
-static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
64
-{
65
- uint8_t *haddr = p->haddr;
66
- int i, size = p->size;
67
-
68
- for (i = 0; i < size; i++, val_le >>= 8) {
69
- haddr[i] = val_le;
70
- }
71
- return val_le;
72
-}
73
-
74
/*
75
* Wrapper for the above.
76
*/
77
static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
78
- uint64_t val_le, int mmu_idx, uintptr_t ra)
79
+ uint64_t val_le, int mmu_idx,
80
+ MemOp mop, uintptr_t ra)
81
{
82
+ MemOp atom;
83
+ unsigned tmp, half_size;
84
+
85
if (unlikely(p->flags & TLB_MMIO)) {
86
return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
87
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
88
return val_le >> (p->size * 8);
89
- } else {
90
- return do_st_bytes_leN(p, val_le);
91
+ }
92
+
93
+ /*
94
+ * It is a given that we cross a page and therefore there is no atomicity
95
+ * for the store as a whole, but subobjects may need attention.
96
+ */
97
+ atom = mop & MO_ATOM_MASK;
98
+ switch (atom) {
99
+ case MO_ATOM_SUBALIGN:
100
+ return store_parts_leN(p->haddr, p->size, val_le);
101
+
102
+ case MO_ATOM_IFALIGN_PAIR:
103
+ case MO_ATOM_WITHIN16_PAIR:
104
+ tmp = mop & MO_SIZE;
105
+ tmp = tmp ? tmp - 1 : 0;
106
+ half_size = 1 << tmp;
107
+ if (atom == MO_ATOM_IFALIGN_PAIR
108
+ ? p->size == half_size
109
+ : p->size >= half_size) {
110
+ if (!HAVE_al8_fast && p->size <= 4) {
111
+ return store_whole_le4(p->haddr, p->size, val_le);
112
+ } else if (HAVE_al8) {
113
+ return store_whole_le8(p->haddr, p->size, val_le);
114
+ } else {
115
+ cpu_loop_exit_atomic(env_cpu(env), ra);
116
+ }
117
+ }
118
+ /* fall through */
119
+
120
+ case MO_ATOM_IFALIGN:
121
+ case MO_ATOM_WITHIN16:
122
+ case MO_ATOM_NONE:
123
+ return store_bytes_leN(p->haddr, p->size, val_le);
124
+
125
+ default:
126
+ g_assert_not_reached();
127
}
128
}
129
130
@@ -XXX,XX +XXX,XX @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
131
if (memop & MO_BSWAP) {
132
val = bswap16(val);
133
}
134
- store_memop(p->haddr, val, MO_UW);
135
+ store_atom_2(env, ra, p->haddr, memop, val);
136
}
137
}
138
139
@@ -XXX,XX +XXX,XX @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
140
if (memop & MO_BSWAP) {
141
val = bswap32(val);
142
}
143
- store_memop(p->haddr, val, MO_UL);
144
+ store_atom_4(env, ra, p->haddr, memop, val);
145
}
146
}
147
148
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
149
if (memop & MO_BSWAP) {
150
val = bswap64(val);
151
}
152
- store_memop(p->haddr, val, MO_UQ);
153
+ store_atom_8(env, ra, p->haddr, memop, val);
154
}
155
}
156
157
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
158
if ((l.memop & MO_BSWAP) != MO_LE) {
159
val = bswap32(val);
160
}
161
- val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
162
- (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
163
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
164
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
165
}
166
167
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
168
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
169
if ((l.memop & MO_BSWAP) != MO_LE) {
170
val = bswap64(val);
171
}
172
- val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
173
- (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
174
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
175
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
176
}
177
178
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
179
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
180
index XXXXXXX..XXXXXXX 100644
181
--- a/accel/tcg/user-exec.c
182
+++ b/accel/tcg/user-exec.c
183
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
184
185
validate_memop(oi, MO_BEUW);
186
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
187
- stw_be_p(haddr, val);
188
+ store_atom_2(env, ra, haddr, get_memop(oi), be16_to_cpu(val));
189
clear_helper_retaddr();
190
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
191
}
192
@@ -XXX,XX +XXX,XX @@ void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
193
194
validate_memop(oi, MO_BEUL);
195
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
196
- stl_be_p(haddr, val);
197
+ store_atom_4(env, ra, haddr, get_memop(oi), be32_to_cpu(val));
198
clear_helper_retaddr();
199
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
200
}
201
@@ -XXX,XX +XXX,XX @@ void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
202
203
validate_memop(oi, MO_BEUQ);
204
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
205
- stq_be_p(haddr, val);
206
+ store_atom_8(env, ra, haddr, get_memop(oi), be64_to_cpu(val));
207
clear_helper_retaddr();
208
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
209
}
210
@@ -XXX,XX +XXX,XX @@ void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
211
212
validate_memop(oi, MO_LEUW);
213
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
214
- stw_le_p(haddr, val);
215
+ store_atom_2(env, ra, haddr, get_memop(oi), le16_to_cpu(val));
216
clear_helper_retaddr();
217
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
218
}
219
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
220
221
validate_memop(oi, MO_LEUL);
222
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
223
- stl_le_p(haddr, val);
224
+ store_atom_4(env, ra, haddr, get_memop(oi), le32_to_cpu(val));
225
clear_helper_retaddr();
226
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
227
}
228
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
229
230
validate_memop(oi, MO_LEUQ);
231
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
232
- stq_le_p(haddr, val);
233
+ store_atom_8(env, ra, haddr, get_memop(oi), le64_to_cpu(val));
234
clear_helper_retaddr();
235
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
236
}
237
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
238
index XXXXXXX..XXXXXXX 100644
239
--- a/accel/tcg/ldst_atomicity.c.inc
240
+++ b/accel/tcg/ldst_atomicity.c.inc
241
@@ -XXX,XX +XXX,XX @@
242
#else
243
# define HAVE_al16_fast false
244
#endif
245
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
246
+# define HAVE_al16 true
247
+#else
248
+# define HAVE_al16 false
249
+#endif
250
+
251
252
/**
253
* required_atomicity:
254
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
255
g_assert_not_reached();
256
}
257
}
258
+
259
+/**
260
+ * store_atomic2:
261
+ * @pv: host address
262
+ * @val: value to store
263
+ *
264
+ * Atomically store 2 aligned bytes to @pv.
265
+ */
266
+static inline void store_atomic2(void *pv, uint16_t val)
267
+{
268
+ uint16_t *p = __builtin_assume_aligned(pv, 2);
269
+ qatomic_set(p, val);
270
+}
271
+
272
+/**
273
+ * store_atomic4:
274
+ * @pv: host address
275
+ * @val: value to store
276
+ *
277
+ * Atomically store 4 aligned bytes to @pv.
278
+ */
279
+static inline void store_atomic4(void *pv, uint32_t val)
280
+{
281
+ uint32_t *p = __builtin_assume_aligned(pv, 4);
282
+ qatomic_set(p, val);
283
+}
284
+
285
+/**
286
+ * store_atomic8:
287
+ * @pv: host address
288
+ * @val: value to store
289
+ *
290
+ * Atomically store 8 aligned bytes to @pv.
291
+ */
292
+static inline void store_atomic8(void *pv, uint64_t val)
293
+{
294
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
295
+
296
+ qemu_build_assert(HAVE_al8);
297
+ qatomic_set__nocheck(p, val);
298
+}
299
+
300
+/**
301
+ * store_atom_4x2
302
+ */
303
+static inline void store_atom_4_by_2(void *pv, uint32_t val)
304
+{
305
+ store_atomic2(pv, val >> (HOST_BIG_ENDIAN ? 16 : 0));
306
+ store_atomic2(pv + 2, val >> (HOST_BIG_ENDIAN ? 0 : 16));
307
+}
308
+
309
+/**
310
+ * store_atom_8_by_2
311
+ */
312
+static inline void store_atom_8_by_2(void *pv, uint64_t val)
313
+{
314
+ store_atom_4_by_2(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
315
+ store_atom_4_by_2(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
316
+}
317
+
318
+/**
319
+ * store_atom_8_by_4
320
+ */
321
+static inline void store_atom_8_by_4(void *pv, uint64_t val)
322
+{
323
+ store_atomic4(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
324
+ store_atomic4(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
325
+}
326
+
327
+/**
328
+ * store_atom_insert_al4:
329
+ * @p: host address
330
+ * @val: shifted value to store
331
+ * @msk: mask for value to store
332
+ *
333
+ * Atomically store @val to @p, masked by @msk.
334
+ */
335
+static void store_atom_insert_al4(uint32_t *p, uint32_t val, uint32_t msk)
336
+{
337
+ uint32_t old, new;
338
+
339
+ p = __builtin_assume_aligned(p, 4);
340
+ old = qatomic_read(p);
341
+ do {
342
+ new = (old & ~msk) | val;
343
+ } while (!__atomic_compare_exchange_n(p, &old, new, true,
344
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
345
+}
346
+
347
+/**
348
+ * store_atom_insert_al8:
349
+ * @p: host address
350
+ * @val: shifted value to store
351
+ * @msk: mask for value to store
352
+ *
353
+ * Atomically store @val to @p masked by @msk.
354
+ */
355
+static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
356
+{
357
+ uint64_t old, new;
358
+
359
+ qemu_build_assert(HAVE_al8);
360
+ p = __builtin_assume_aligned(p, 8);
361
+ old = qatomic_read__nocheck(p);
362
+ do {
363
+ new = (old & ~msk) | val;
364
+ } while (!__atomic_compare_exchange_n(p, &old, new, true,
365
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
366
+}
367
+
368
+/**
369
+ * store_atom_insert_al16:
370
+ * @p: host address
371
+ * @val: shifted value to store
372
+ * @msk: mask for value to store
373
+ *
374
+ * Atomically store @val to @p masked by @msk.
375
+ */
376
+static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
377
+{
378
+#if defined(CONFIG_ATOMIC128)
379
+ __uint128_t *pu, old, new;
380
+
381
+ /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
382
+ pu = __builtin_assume_aligned(ps, 16);
383
+ old = *pu;
384
+ do {
385
+ new = (old & ~msk.u) | val.u;
386
+ } while (!__atomic_compare_exchange_n(pu, &old, new, true,
387
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
388
+#elif defined(CONFIG_CMPXCHG128)
389
+ __uint128_t *pu, old, new;
390
+
391
+ /*
392
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
393
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
394
+ * and accept the sequential consistency that comes with it.
395
+ */
396
+ pu = __builtin_assume_aligned(ps, 16);
397
+ do {
398
+ old = *pu;
399
+ new = (old & ~msk.u) | val.u;
400
+ } while (!__sync_bool_compare_and_swap_16(pu, old, new));
401
+#else
402
+ qemu_build_not_reached();
403
+#endif
404
+}
405
+
406
+/**
407
+ * store_bytes_leN:
408
+ * @pv: host address
409
+ * @size: number of bytes to store
410
+ * @val_le: data to store
411
+ *
412
+ * Store @size bytes at @p. The bytes to store are extracted in little-endian order
413
+ * from @val_le; return the bytes of @val_le beyond @size that have not been stored.
414
+ */
415
+static uint64_t store_bytes_leN(void *pv, int size, uint64_t val_le)
416
+{
417
+ uint8_t *p = pv;
418
+ for (int i = 0; i < size; i++, val_le >>= 8) {
419
+ p[i] = val_le;
420
+ }
421
+ return val_le;
422
+}
423
+
424
+/**
425
+ * store_parts_leN
426
+ * @pv: host address
427
+ * @size: number of bytes to store
428
+ * @val_le: data to store
429
+ *
430
+ * As store_bytes_leN, but atomically on each aligned part.
431
+ */
432
+G_GNUC_UNUSED
433
+static uint64_t store_parts_leN(void *pv, int size, uint64_t val_le)
434
+{
435
+ do {
436
+ int n;
437
+
438
+ /* Find minimum of alignment and size */
439
+ switch (((uintptr_t)pv | size) & 7) {
440
+ case 4:
441
+ store_atomic4(pv, le32_to_cpu(val_le));
442
+ val_le >>= 32;
443
+ n = 4;
444
+ break;
445
+ case 2:
446
+ case 6:
447
+ store_atomic2(pv, le16_to_cpu(val_le));
448
+ val_le >>= 16;
449
+ n = 2;
450
+ break;
451
+ default:
452
+ *(uint8_t *)pv = val_le;
453
+ val_le >>= 8;
454
+ n = 1;
455
+ break;
456
+ case 0:
457
+ g_assert_not_reached();
458
+ }
459
+ pv += n;
460
+ size -= n;
461
+ } while (size != 0);
462
+
463
+ return val_le;
464
+}
465
+
466
+/**
467
+ * store_whole_le4
468
+ * @pv: host address
469
+ * @size: number of bytes to store
470
+ * @val_le: data to store
471
+ *
472
+ * As store_bytes_leN, but atomically as a whole.
473
+ * Four aligned bytes are guaranteed to cover the store.
474
+ */
475
+static uint64_t store_whole_le4(void *pv, int size, uint64_t val_le)
476
+{
477
+ int sz = size * 8;
478
+ int o = (uintptr_t)pv & 3;
479
+ int sh = o * 8;
480
+ uint32_t m = MAKE_64BIT_MASK(0, sz);
481
+ uint32_t v;
482
+
483
+ if (HOST_BIG_ENDIAN) {
484
+ v = bswap32(val_le) >> sh;
485
+ m = bswap32(m) >> sh;
486
+ } else {
487
+ v = val_le << sh;
488
+ m <<= sh;
489
+ }
490
+ store_atom_insert_al4(pv - o, v, m);
491
+ return val_le >> sz;
492
+}
493
+
494
+/**
495
+ * store_whole_le8
496
+ * @pv: host address
497
+ * @size: number of bytes to store
498
+ * @val_le: data to store
499
+ *
500
+ * As store_bytes_leN, but atomically as a whole.
501
+ * Eight aligned bytes are guaranteed to cover the store.
502
+ */
503
+static uint64_t store_whole_le8(void *pv, int size, uint64_t val_le)
504
+{
505
+ int sz = size * 8;
506
+ int o = (uintptr_t)pv & 7;
507
+ int sh = o * 8;
508
+ uint64_t m = MAKE_64BIT_MASK(0, sz);
509
+ uint64_t v;
510
+
511
+ qemu_build_assert(HAVE_al8);
512
+ if (HOST_BIG_ENDIAN) {
513
+ v = bswap64(val_le) >> sh;
514
+ m = bswap64(m) >> sh;
515
+ } else {
516
+ v = val_le << sh;
517
+ m <<= sh;
518
+ }
519
+ store_atom_insert_al8(pv - o, v, m);
520
+ return val_le >> sz;
521
+}
522
+
523
+/**
524
+ * store_whole_le16
525
+ * @pv: host address
526
+ * @size: number of bytes to store
527
+ * @val_le: data to store
528
+ *
529
+ * As store_bytes_leN, but atomically as a whole.
530
+ * 16 aligned bytes are guaranteed to cover the store.
531
+ */
532
+static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
533
+{
534
+ int sz = size * 8;
535
+ int o = (uintptr_t)pv & 15;
536
+ int sh = o * 8;
537
+ Int128 m, v;
538
+
539
+ qemu_build_assert(HAVE_al16);
540
+
541
+ /* Like MAKE_64BIT_MASK(0, sz), but larger. */
542
+ if (sz <= 64) {
543
+ m = int128_make64(MAKE_64BIT_MASK(0, sz));
544
+ } else {
545
+ m = int128_make128(-1, MAKE_64BIT_MASK(0, sz - 64));
546
+ }
547
+
548
+ if (HOST_BIG_ENDIAN) {
549
+ v = int128_urshift(bswap128(val_le), sh);
550
+ m = int128_urshift(bswap128(m), sh);
551
+ } else {
552
+ v = int128_lshift(val_le, sh);
553
+ m = int128_lshift(m, sh);
554
+ }
555
+ store_atom_insert_al16(pv - o, v, m);
556
+
557
+ /* Unused if sz <= 64. */
558
+ return int128_gethi(val_le) >> (sz - 64);
559
+}
560
+
561
+/**
562
+ * store_atom_2:
563
+ * @p: host address
564
+ * @val: the value to store
565
+ * @memop: the full memory op
566
+ *
567
+ * Store 2 bytes to @p, honoring the atomicity of @memop.
568
+ */
569
+static void store_atom_2(CPUArchState *env, uintptr_t ra,
570
+ void *pv, MemOp memop, uint16_t val)
571
+{
572
+ uintptr_t pi = (uintptr_t)pv;
573
+ int atmax;
574
+
575
+ if (likely((pi & 1) == 0)) {
576
+ store_atomic2(pv, val);
577
+ return;
578
+ }
579
+
580
+ atmax = required_atomicity(env, pi, memop);
581
+ if (atmax == MO_8) {
582
+ stw_he_p(pv, val);
583
+ return;
584
+ }
585
+
586
+ /*
587
+ * The only case remaining is MO_ATOM_WITHIN16.
588
+ * Big or little endian, we want the middle two bytes in each test.
589
+ */
590
+ if ((pi & 3) == 1) {
591
+ store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
592
+ return;
593
+ } else if ((pi & 7) == 3) {
594
+ if (HAVE_al8) {
595
+ store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
596
+ return;
597
+ }
598
+ } else if ((pi & 15) == 7) {
599
+ if (HAVE_al16) {
600
+ Int128 v = int128_lshift(int128_make64(val), 56);
601
+ Int128 m = int128_lshift(int128_make64(0xffff), 56);
602
+ store_atom_insert_al16(pv - 7, v, m);
603
+ return;
604
+ }
605
+ } else {
606
+ g_assert_not_reached();
607
+ }
608
+
609
+ cpu_loop_exit_atomic(env_cpu(env), ra);
610
+}
611
+
612
+/**
613
+ * store_atom_4:
614
+ * @p: host address
615
+ * @val: the value to store
616
+ * @memop: the full memory op
617
+ *
618
+ * Store 4 bytes to @p, honoring the atomicity of @memop.
619
+ */
620
+static void store_atom_4(CPUArchState *env, uintptr_t ra,
621
+ void *pv, MemOp memop, uint32_t val)
622
+{
623
+ uintptr_t pi = (uintptr_t)pv;
624
+ int atmax;
625
+
626
+ if (likely((pi & 3) == 0)) {
627
+ store_atomic4(pv, val);
628
+ return;
629
+ }
630
+
631
+ atmax = required_atomicity(env, pi, memop);
632
+ switch (atmax) {
633
+ case MO_8:
634
+ stl_he_p(pv, val);
635
+ return;
636
+ case MO_16:
637
+ store_atom_4_by_2(pv, val);
638
+ return;
639
+ case -MO_16:
640
+ {
641
+ uint32_t val_le = cpu_to_le32(val);
642
+ int s2 = pi & 3;
643
+ int s1 = 4 - s2;
644
+
645
+ switch (s2) {
646
+ case 1:
647
+ val_le = store_whole_le4(pv, s1, val_le);
648
+ *(uint8_t *)(pv + 3) = val_le;
649
+ break;
650
+ case 3:
651
+ *(uint8_t *)pv = val_le;
652
+ store_whole_le4(pv + 1, s2, val_le >> 8);
653
+ break;
654
+ case 0: /* aligned */
655
+ case 2: /* atmax MO_16 */
656
+ default:
657
+ g_assert_not_reached();
658
+ }
659
+ }
660
+ return;
661
+ case MO_32:
662
+ if ((pi & 7) < 4) {
663
+ if (HAVE_al8) {
664
+ store_whole_le8(pv, 4, cpu_to_le32(val));
665
+ return;
666
+ }
667
+ } else {
668
+ if (HAVE_al16) {
669
+ store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
670
+ return;
671
+ }
672
+ }
673
+ cpu_loop_exit_atomic(env_cpu(env), ra);
674
+ default:
675
+ g_assert_not_reached();
676
+ }
677
+}
678
+
679
+/**
680
+ * store_atom_8:
681
+ * @p: host address
682
+ * @val: the value to store
683
+ * @memop: the full memory op
684
+ *
685
+ * Store 8 bytes to @p, honoring the atomicity of @memop.
686
+ */
687
+static void store_atom_8(CPUArchState *env, uintptr_t ra,
688
+ void *pv, MemOp memop, uint64_t val)
689
+{
690
+ uintptr_t pi = (uintptr_t)pv;
691
+ int atmax;
692
+
693
+ if (HAVE_al8 && likely((pi & 7) == 0)) {
694
+ store_atomic8(pv, val);
695
+ return;
696
+ }
697
+
698
+ atmax = required_atomicity(env, pi, memop);
699
+ switch (atmax) {
700
+ case MO_8:
701
+ stq_he_p(pv, val);
702
+ return;
703
+ case MO_16:
704
+ store_atom_8_by_2(pv, val);
705
+ return;
706
+ case MO_32:
707
+ store_atom_8_by_4(pv, val);
708
+ return;
709
+ case -MO_32:
710
+ if (HAVE_al8) {
711
+ uint64_t val_le = cpu_to_le64(val);
712
+ int s2 = pi & 7;
713
+ int s1 = 8 - s2;
714
+
715
+ switch (s2) {
716
+ case 1 ... 3:
717
+ val_le = store_whole_le8(pv, s1, val_le);
718
+ store_bytes_leN(pv + s1, s2, val_le);
719
+ break;
720
+ case 5 ... 7:
721
+ val_le = store_bytes_leN(pv, s1, val_le);
722
+ store_whole_le8(pv + s1, s2, val_le);
723
+ break;
724
+ case 0: /* aligned */
725
+ case 4: /* atmax MO_32 */
726
+ default:
727
+ g_assert_not_reached();
728
+ }
729
+ return;
730
+ }
731
+ break;
732
+ case MO_64:
733
+ if (HAVE_al16) {
734
+ store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
735
+ return;
736
+ }
737
+ break;
738
+ default:
739
+ g_assert_not_reached();
740
+ }
741
+ cpu_loop_exit_atomic(env_cpu(env), ra);
742
+}
743
--
744
2.34.1
diff view generated by jsdifflib
New patch
1
With the current structure of cputlb.c, there is no difference
2
between the little-endian and big-endian entry points, aside
3
from the assert. Unify the pairs of functions.
1
4
5
Hoist the qemu_{ld,st}_helpers arrays to tcg.c.
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
docs/devel/loads-stores.rst | 36 ++----
12
include/tcg/tcg-ldst.h | 60 ++++------
13
accel/tcg/cputlb.c | 190 ++++++++++---------------------
14
tcg/tcg.c | 21 ++++
15
tcg/tci.c | 61 ++++------
16
tcg/aarch64/tcg-target.c.inc | 33 ------
17
tcg/arm/tcg-target.c.inc | 37 ------
18
tcg/i386/tcg-target.c.inc | 30 +----
19
tcg/loongarch64/tcg-target.c.inc | 23 ----
20
tcg/mips/tcg-target.c.inc | 31 -----
21
tcg/ppc/tcg-target.c.inc | 30 +----
22
tcg/riscv/tcg-target.c.inc | 42 -------
23
tcg/s390x/tcg-target.c.inc | 31 +----
24
tcg/sparc64/tcg-target.c.inc | 32 +-----
25
14 files changed, 146 insertions(+), 511 deletions(-)
26
27
diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
28
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/devel/loads-stores.rst
30
+++ b/docs/devel/loads-stores.rst
31
@@ -XXX,XX +XXX,XX @@ swap: ``translator_ld{sign}{size}_swap(env, ptr, swap)``
32
Regexes for git grep
33
- ``\<translator_ld[us]\?[bwlq]\(_swap\)\?\>``
34
35
-``helper_*_{ld,st}*_mmu``
36
+``helper_{ld,st}*_mmu``
37
~~~~~~~~~~~~~~~~~~~~~~~~~
38
39
These functions are intended primarily to be called by the code
40
-generated by the TCG backend. They may also be called by target
41
-CPU helper function code. Like the ``cpu_{ld,st}_mmuidx_ra`` functions
42
-they perform accesses by guest virtual address, with a given ``mmuidx``.
43
+generated by the TCG backend. Like the ``cpu_{ld,st}_mmu`` functions
44
+they perform accesses by guest virtual address, with a given ``MemOpIdx``.
45
46
-These functions specify an ``opindex`` parameter which encodes
47
-(among other things) the mmu index to use for the access. This parameter
48
-should be created by calling ``make_memop_idx()``.
49
+They differ from ``cpu_{ld,st}_mmu`` in that they take the endianness
50
+of the operation only from the MemOpIdx, and loads extend the return
51
+value to the size of a host general register (``tcg_target_ulong``).
52
53
-The ``retaddr`` parameter should be the result of GETPC() called directly
54
-from the top level HELPER(foo) function (or 0 if no guest CPU state
55
-unwinding is required).
56
+load: ``helper_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
57
58
-**TODO** The names of these functions are a bit odd for historical
59
-reasons because they were originally expected to be called only from
60
-within generated code. We should rename them to bring them more in
61
-line with the other memory access functions. The explicit endianness
62
-is the only feature they have beyond ``*_mmuidx_ra``.
63
-
64
-load: ``helper_{endian}_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
65
-
66
-store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
67
+store: ``helper_{size}_mmu(env, addr, val, opindex, retaddr)``
68
69
``sign``
70
- (empty) : for 32 or 64 bit sizes
71
@@ -XXX,XX +XXX,XX @@ store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
72
- ``l`` : 32 bits
73
- ``q`` : 64 bits
74
75
-``endian``
76
- - ``le`` : little endian
77
- - ``be`` : big endian
78
- - ``ret`` : target endianness
79
-
80
Regexes for git grep
81
- - ``\<helper_\(le\|be\|ret\)_ld[us]\?[bwlq]_mmu\>``
82
- - ``\<helper_\(le\|be\|ret\)_st[bwlq]_mmu\>``
83
+ - ``\<helper_ld[us]\?[bwlq]_mmu\>``
84
+ - ``\<helper_st[bwlq]_mmu\>``
85
86
``address_space_*``
87
~~~~~~~~~~~~~~~~~~~
88
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/tcg/tcg-ldst.h
91
+++ b/include/tcg/tcg-ldst.h
92
@@ -XXX,XX +XXX,XX @@
93
#ifdef CONFIG_SOFTMMU
94
95
/* Value zero-extended to tcg register size. */
96
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
97
- MemOpIdx oi, uintptr_t retaddr);
98
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
99
- MemOpIdx oi, uintptr_t retaddr);
100
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
101
- MemOpIdx oi, uintptr_t retaddr);
102
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
103
- MemOpIdx oi, uintptr_t retaddr);
104
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
105
- MemOpIdx oi, uintptr_t retaddr);
106
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
107
- MemOpIdx oi, uintptr_t retaddr);
108
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
109
- MemOpIdx oi, uintptr_t retaddr);
110
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
111
+ MemOpIdx oi, uintptr_t retaddr);
112
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
113
+ MemOpIdx oi, uintptr_t retaddr);
114
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
115
+ MemOpIdx oi, uintptr_t retaddr);
116
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
117
+ MemOpIdx oi, uintptr_t retaddr);
118
119
/* Value sign-extended to tcg register size. */
120
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
121
- MemOpIdx oi, uintptr_t retaddr);
122
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
123
- MemOpIdx oi, uintptr_t retaddr);
124
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
125
- MemOpIdx oi, uintptr_t retaddr);
126
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
127
- MemOpIdx oi, uintptr_t retaddr);
128
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
129
- MemOpIdx oi, uintptr_t retaddr);
130
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
131
+ MemOpIdx oi, uintptr_t retaddr);
132
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
133
+ MemOpIdx oi, uintptr_t retaddr);
134
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
135
+ MemOpIdx oi, uintptr_t retaddr);
136
137
/*
138
* Value extended to at least uint32_t, so that some ABIs do not require
139
* zero-extension from uint8_t or uint16_t.
140
*/
141
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
142
- MemOpIdx oi, uintptr_t retaddr);
143
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
144
- MemOpIdx oi, uintptr_t retaddr);
145
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
146
- MemOpIdx oi, uintptr_t retaddr);
147
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
148
- MemOpIdx oi, uintptr_t retaddr);
149
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
150
- MemOpIdx oi, uintptr_t retaddr);
151
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
152
- MemOpIdx oi, uintptr_t retaddr);
153
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
154
- MemOpIdx oi, uintptr_t retaddr);
155
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
156
+ MemOpIdx oi, uintptr_t retaddr);
157
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
158
+ MemOpIdx oi, uintptr_t retaddr);
159
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
160
+ MemOpIdx oi, uintptr_t retaddr);
161
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
162
+ MemOpIdx oi, uintptr_t retaddr);
163
164
#else
165
166
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
167
index XXXXXXX..XXXXXXX 100644
168
--- a/accel/tcg/cputlb.c
169
+++ b/accel/tcg/cputlb.c
170
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
171
cpu_loop_exit_atomic(env_cpu(env), retaddr);
172
}
173
174
-/*
175
- * Verify that we have passed the correct MemOp to the correct function.
176
- *
177
- * In the case of the helper_*_mmu functions, we will have done this by
178
- * using the MemOp to look up the helper during code generation.
179
- *
180
- * In the case of the cpu_*_mmu functions, this is up to the caller.
181
- * We could present one function to target code, and dispatch based on
182
- * the MemOp, but so far we have worked hard to avoid an indirect function
183
- * call along the memory path.
184
- */
185
-static void validate_memop(MemOpIdx oi, MemOp expected)
186
-{
187
-#ifdef CONFIG_DEBUG_TCG
188
- MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
189
- assert(have == expected);
190
-#endif
191
-}
192
-
193
/*
194
* Load Helpers
195
*
196
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
197
return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
198
}
199
200
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
201
- MemOpIdx oi, uintptr_t retaddr)
202
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
203
+ MemOpIdx oi, uintptr_t retaddr)
204
{
205
- validate_memop(oi, MO_UB);
206
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
207
return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
208
}
209
210
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
211
return ret;
212
}
213
214
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
215
- MemOpIdx oi, uintptr_t retaddr)
216
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
217
+ MemOpIdx oi, uintptr_t retaddr)
218
{
219
- validate_memop(oi, MO_LEUW);
220
- return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
221
-}
222
-
223
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
224
- MemOpIdx oi, uintptr_t retaddr)
225
-{
226
- validate_memop(oi, MO_BEUW);
227
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
228
return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
229
}
230
231
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
232
return ret;
233
}
234
235
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
236
- MemOpIdx oi, uintptr_t retaddr)
237
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
238
+ MemOpIdx oi, uintptr_t retaddr)
239
{
240
- validate_memop(oi, MO_LEUL);
241
- return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
242
-}
243
-
244
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
245
- MemOpIdx oi, uintptr_t retaddr)
246
-{
247
- validate_memop(oi, MO_BEUL);
248
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
249
return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
250
}
251
252
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
253
return ret;
254
}
255
256
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
257
- MemOpIdx oi, uintptr_t retaddr)
258
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
259
+ MemOpIdx oi, uintptr_t retaddr)
260
{
261
- validate_memop(oi, MO_LEUQ);
262
- return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
263
-}
264
-
265
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
266
- MemOpIdx oi, uintptr_t retaddr)
267
-{
268
- validate_memop(oi, MO_BEUQ);
269
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
270
return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
271
}
272
273
@@ -XXX,XX +XXX,XX @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
274
* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
275
*/
276
277
-
278
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
279
- MemOpIdx oi, uintptr_t retaddr)
280
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
281
+ MemOpIdx oi, uintptr_t retaddr)
282
{
283
- return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
284
+ return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
285
}
286
287
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
288
- MemOpIdx oi, uintptr_t retaddr)
289
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
290
+ MemOpIdx oi, uintptr_t retaddr)
291
{
292
- return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
293
+ return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
294
}
295
296
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
297
- MemOpIdx oi, uintptr_t retaddr)
298
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
299
+ MemOpIdx oi, uintptr_t retaddr)
300
{
301
- return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
302
-}
303
-
304
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
305
- MemOpIdx oi, uintptr_t retaddr)
306
-{
307
- return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
308
-}
309
-
310
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
311
- MemOpIdx oi, uintptr_t retaddr)
312
-{
313
- return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
314
+ return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
315
}
316
317
/*
318
@@ -XXX,XX +XXX,XX @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
319
{
320
uint8_t ret;
321
322
- validate_memop(oi, MO_UB);
323
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_UB);
324
ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
325
plugin_load_cb(env, addr, oi);
326
return ret;
327
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
328
{
329
uint16_t ret;
330
331
- validate_memop(oi, MO_BEUW);
332
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
333
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
334
plugin_load_cb(env, addr, oi);
335
return ret;
336
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
337
{
338
uint32_t ret;
339
340
- validate_memop(oi, MO_BEUL);
341
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
342
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
343
plugin_load_cb(env, addr, oi);
344
return ret;
345
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
346
{
347
uint64_t ret;
348
349
- validate_memop(oi, MO_BEUQ);
350
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
351
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
352
plugin_load_cb(env, addr, oi);
353
return ret;
354
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
355
{
356
uint16_t ret;
357
358
- validate_memop(oi, MO_LEUW);
359
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
360
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
361
plugin_load_cb(env, addr, oi);
362
return ret;
363
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
364
{
365
uint32_t ret;
366
367
- validate_memop(oi, MO_LEUL);
368
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
369
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
370
plugin_load_cb(env, addr, oi);
371
return ret;
372
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
373
{
374
uint64_t ret;
375
376
- validate_memop(oi, MO_LEUQ);
377
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
378
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
379
plugin_load_cb(env, addr, oi);
380
return ret;
381
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
382
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
383
new_oi = make_memop_idx(mop, mmu_idx);
384
385
- h = helper_be_ldq_mmu(env, addr, new_oi, ra);
386
- l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
387
+ h = helper_ldq_mmu(env, addr, new_oi, ra);
388
+ l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
389
390
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
391
return int128_make128(l, h);
392
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
393
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
394
new_oi = make_memop_idx(mop, mmu_idx);
395
396
- l = helper_le_ldq_mmu(env, addr, new_oi, ra);
397
- h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
398
+ l = helper_ldq_mmu(env, addr, new_oi, ra);
399
+ h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
400
401
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
402
return int128_make128(l, h);
403
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
404
}
405
}
406
407
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
408
- MemOpIdx oi, uintptr_t ra)
409
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
410
+ MemOpIdx oi, uintptr_t ra)
411
{
412
MMULookupLocals l;
413
bool crosspage;
414
415
- validate_memop(oi, MO_UB);
416
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
417
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
418
tcg_debug_assert(!crosspage);
419
420
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
421
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
422
}
423
424
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
425
- MemOpIdx oi, uintptr_t retaddr)
426
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
427
+ MemOpIdx oi, uintptr_t retaddr)
428
{
429
- validate_memop(oi, MO_LEUW);
430
- do_st2_mmu(env, addr, val, oi, retaddr);
431
-}
432
-
433
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
434
- MemOpIdx oi, uintptr_t retaddr)
435
-{
436
- validate_memop(oi, MO_BEUW);
437
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
438
do_st2_mmu(env, addr, val, oi, retaddr);
439
}
440
441
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
442
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
443
}
444
445
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
446
- MemOpIdx oi, uintptr_t retaddr)
447
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
448
+ MemOpIdx oi, uintptr_t retaddr)
449
{
450
- validate_memop(oi, MO_LEUL);
451
- do_st4_mmu(env, addr, val, oi, retaddr);
452
-}
453
-
454
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
455
- MemOpIdx oi, uintptr_t retaddr)
456
-{
457
- validate_memop(oi, MO_BEUL);
458
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
459
do_st4_mmu(env, addr, val, oi, retaddr);
460
}
461
462
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
463
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
464
}
465
466
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
467
- MemOpIdx oi, uintptr_t retaddr)
468
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
469
+ MemOpIdx oi, uintptr_t retaddr)
470
{
471
- validate_memop(oi, MO_LEUQ);
472
- do_st8_mmu(env, addr, val, oi, retaddr);
473
-}
474
-
475
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
476
- MemOpIdx oi, uintptr_t retaddr)
477
-{
478
- validate_memop(oi, MO_BEUQ);
479
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
480
do_st8_mmu(env, addr, val, oi, retaddr);
481
}
482
483
@@ -XXX,XX +XXX,XX @@ static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
484
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
485
MemOpIdx oi, uintptr_t retaddr)
486
{
487
- helper_ret_stb_mmu(env, addr, val, oi, retaddr);
488
+ helper_stb_mmu(env, addr, val, oi, retaddr);
489
plugin_store_cb(env, addr, oi);
490
}
491
492
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
493
MemOpIdx oi, uintptr_t retaddr)
494
{
495
- helper_be_stw_mmu(env, addr, val, oi, retaddr);
496
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
497
+ do_st2_mmu(env, addr, val, oi, retaddr);
498
plugin_store_cb(env, addr, oi);
499
}
500
501
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
502
MemOpIdx oi, uintptr_t retaddr)
503
{
504
- helper_be_stl_mmu(env, addr, val, oi, retaddr);
505
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
506
+ do_st4_mmu(env, addr, val, oi, retaddr);
507
plugin_store_cb(env, addr, oi);
508
}
509
510
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
511
MemOpIdx oi, uintptr_t retaddr)
512
{
513
- helper_be_stq_mmu(env, addr, val, oi, retaddr);
514
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
515
+ do_st8_mmu(env, addr, val, oi, retaddr);
516
plugin_store_cb(env, addr, oi);
517
}
518
519
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
520
MemOpIdx oi, uintptr_t retaddr)
521
{
522
- helper_le_stw_mmu(env, addr, val, oi, retaddr);
523
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
524
+ do_st2_mmu(env, addr, val, oi, retaddr);
525
plugin_store_cb(env, addr, oi);
526
}
527
528
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
529
MemOpIdx oi, uintptr_t retaddr)
530
{
531
- helper_le_stl_mmu(env, addr, val, oi, retaddr);
532
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
533
+ do_st4_mmu(env, addr, val, oi, retaddr);
534
plugin_store_cb(env, addr, oi);
535
}
536
537
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
538
MemOpIdx oi, uintptr_t retaddr)
539
{
540
- helper_le_stq_mmu(env, addr, val, oi, retaddr);
541
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
542
+ do_st8_mmu(env, addr, val, oi, retaddr);
543
plugin_store_cb(env, addr, oi);
544
}
545
546
@@ -XXX,XX +XXX,XX @@ void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
547
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
548
new_oi = make_memop_idx(mop, mmu_idx);
549
550
- helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
551
- helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
552
+ helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
553
+ helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
554
555
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
556
}
557
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
558
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
559
new_oi = make_memop_idx(mop, mmu_idx);
560
561
- helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
562
- helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
563
+ helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
564
+ helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
565
566
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
567
}
568
diff --git a/tcg/tcg.c b/tcg/tcg.c
569
index XXXXXXX..XXXXXXX 100644
570
--- a/tcg/tcg.c
571
+++ b/tcg/tcg.c
572
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
573
const TCGLdstHelperParam *p)
574
__attribute__((unused));
575
576
+#ifdef CONFIG_SOFTMMU
577
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
578
+ [MO_UB] = helper_ldub_mmu,
579
+ [MO_SB] = helper_ldsb_mmu,
580
+ [MO_UW] = helper_lduw_mmu,
581
+ [MO_SW] = helper_ldsw_mmu,
582
+ [MO_UL] = helper_ldul_mmu,
583
+ [MO_UQ] = helper_ldq_mmu,
584
+#if TCG_TARGET_REG_BITS == 64
585
+ [MO_SL] = helper_ldsl_mmu,
586
+#endif
587
+};
588
+
589
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
590
+ [MO_8] = helper_stb_mmu,
591
+ [MO_16] = helper_stw_mmu,
592
+ [MO_32] = helper_stl_mmu,
593
+ [MO_64] = helper_stq_mmu,
594
+};
595
+#endif
596
+
597
TCGContext tcg_init_ctx;
598
__thread TCGContext *tcg_ctx;
599
600
diff --git a/tcg/tci.c b/tcg/tci.c
601
index XXXXXXX..XXXXXXX 100644
602
--- a/tcg/tci.c
603
+++ b/tcg/tci.c
604
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
605
uintptr_t ra = (uintptr_t)tb_ptr;
606
607
#ifdef CONFIG_SOFTMMU
608
- switch (mop & (MO_BSWAP | MO_SSIZE)) {
609
+ switch (mop & MO_SSIZE) {
610
case MO_UB:
611
- return helper_ret_ldub_mmu(env, taddr, oi, ra);
612
+ return helper_ldub_mmu(env, taddr, oi, ra);
613
case MO_SB:
614
- return helper_ret_ldsb_mmu(env, taddr, oi, ra);
615
- case MO_LEUW:
616
- return helper_le_lduw_mmu(env, taddr, oi, ra);
617
- case MO_LESW:
618
- return helper_le_ldsw_mmu(env, taddr, oi, ra);
619
- case MO_LEUL:
620
- return helper_le_ldul_mmu(env, taddr, oi, ra);
621
- case MO_LESL:
622
- return helper_le_ldsl_mmu(env, taddr, oi, ra);
623
- case MO_LEUQ:
624
- return helper_le_ldq_mmu(env, taddr, oi, ra);
625
- case MO_BEUW:
626
- return helper_be_lduw_mmu(env, taddr, oi, ra);
627
- case MO_BESW:
628
- return helper_be_ldsw_mmu(env, taddr, oi, ra);
629
- case MO_BEUL:
630
- return helper_be_ldul_mmu(env, taddr, oi, ra);
631
- case MO_BESL:
632
- return helper_be_ldsl_mmu(env, taddr, oi, ra);
633
- case MO_BEUQ:
634
- return helper_be_ldq_mmu(env, taddr, oi, ra);
635
+ return helper_ldsb_mmu(env, taddr, oi, ra);
636
+ case MO_UW:
637
+ return helper_lduw_mmu(env, taddr, oi, ra);
638
+ case MO_SW:
639
+ return helper_ldsw_mmu(env, taddr, oi, ra);
640
+ case MO_UL:
641
+ return helper_ldul_mmu(env, taddr, oi, ra);
642
+ case MO_SL:
643
+ return helper_ldsl_mmu(env, taddr, oi, ra);
644
+ case MO_UQ:
645
+ return helper_ldq_mmu(env, taddr, oi, ra);
646
default:
647
g_assert_not_reached();
648
}
649
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
650
uintptr_t ra = (uintptr_t)tb_ptr;
651
652
#ifdef CONFIG_SOFTMMU
653
- switch (mop & (MO_BSWAP | MO_SIZE)) {
654
+ switch (mop & MO_SIZE) {
655
case MO_UB:
656
- helper_ret_stb_mmu(env, taddr, val, oi, ra);
657
+ helper_stb_mmu(env, taddr, val, oi, ra);
658
break;
659
- case MO_LEUW:
660
- helper_le_stw_mmu(env, taddr, val, oi, ra);
661
+ case MO_UW:
662
+ helper_stw_mmu(env, taddr, val, oi, ra);
663
break;
664
- case MO_LEUL:
665
- helper_le_stl_mmu(env, taddr, val, oi, ra);
666
+ case MO_UL:
667
+ helper_stl_mmu(env, taddr, val, oi, ra);
668
break;
669
- case MO_LEUQ:
670
- helper_le_stq_mmu(env, taddr, val, oi, ra);
671
- break;
672
- case MO_BEUW:
673
- helper_be_stw_mmu(env, taddr, val, oi, ra);
674
- break;
675
- case MO_BEUL:
676
- helper_be_stl_mmu(env, taddr, val, oi, ra);
677
- break;
678
- case MO_BEUQ:
679
- helper_be_stq_mmu(env, taddr, val, oi, ra);
680
+ case MO_UQ:
681
+ helper_stq_mmu(env, taddr, val, oi, ra);
682
break;
683
default:
684
g_assert_not_reached();
685
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
686
index XXXXXXX..XXXXXXX 100644
687
--- a/tcg/aarch64/tcg-target.c.inc
688
+++ b/tcg/aarch64/tcg-target.c.inc
689
@@ -XXX,XX +XXX,XX @@ typedef struct {
690
} HostAddress;
691
692
#ifdef CONFIG_SOFTMMU
693
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
694
- * MemOpIdx oi, uintptr_t ra)
695
- */
696
-static void * const qemu_ld_helpers[MO_SIZE + 1] = {
697
- [MO_8] = helper_ret_ldub_mmu,
698
-#if HOST_BIG_ENDIAN
699
- [MO_16] = helper_be_lduw_mmu,
700
- [MO_32] = helper_be_ldul_mmu,
701
- [MO_64] = helper_be_ldq_mmu,
702
-#else
703
- [MO_16] = helper_le_lduw_mmu,
704
- [MO_32] = helper_le_ldul_mmu,
705
- [MO_64] = helper_le_ldq_mmu,
706
-#endif
707
-};
708
-
709
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
710
- * uintxx_t val, MemOpIdx oi,
711
- * uintptr_t ra)
712
- */
713
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
714
- [MO_8] = helper_ret_stb_mmu,
715
-#if HOST_BIG_ENDIAN
716
- [MO_16] = helper_be_stw_mmu,
717
- [MO_32] = helper_be_stl_mmu,
718
- [MO_64] = helper_be_stq_mmu,
719
-#else
720
- [MO_16] = helper_le_stw_mmu,
721
- [MO_32] = helper_le_stl_mmu,
722
- [MO_64] = helper_le_stq_mmu,
723
-#endif
724
-};
725
-
726
static const TCGLdstHelperParam ldst_helper_param = {
727
.ntmp = 1, .tmp = { TCG_REG_TMP }
728
};
729
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
730
index XXXXXXX..XXXXXXX 100644
731
--- a/tcg/arm/tcg-target.c.inc
732
+++ b/tcg/arm/tcg-target.c.inc
733
@@ -XXX,XX +XXX,XX @@ typedef struct {
734
} HostAddress;
735
736
#ifdef CONFIG_SOFTMMU
737
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
738
- * int mmu_idx, uintptr_t ra)
739
- */
740
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
741
- [MO_UB] = helper_ret_ldub_mmu,
742
- [MO_SB] = helper_ret_ldsb_mmu,
743
-#if HOST_BIG_ENDIAN
744
- [MO_UW] = helper_be_lduw_mmu,
745
- [MO_UL] = helper_be_ldul_mmu,
746
- [MO_UQ] = helper_be_ldq_mmu,
747
- [MO_SW] = helper_be_ldsw_mmu,
748
- [MO_SL] = helper_be_ldul_mmu,
749
-#else
750
- [MO_UW] = helper_le_lduw_mmu,
751
- [MO_UL] = helper_le_ldul_mmu,
752
- [MO_UQ] = helper_le_ldq_mmu,
753
- [MO_SW] = helper_le_ldsw_mmu,
754
- [MO_SL] = helper_le_ldul_mmu,
755
-#endif
756
-};
757
-
758
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
759
- * uintxx_t val, int mmu_idx, uintptr_t ra)
760
- */
761
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
762
- [MO_8] = helper_ret_stb_mmu,
763
-#if HOST_BIG_ENDIAN
764
- [MO_16] = helper_be_stw_mmu,
765
- [MO_32] = helper_be_stl_mmu,
766
- [MO_64] = helper_be_stq_mmu,
767
-#else
768
- [MO_16] = helper_le_stw_mmu,
769
- [MO_32] = helper_le_stl_mmu,
770
- [MO_64] = helper_le_stq_mmu,
771
-#endif
772
-};
773
-
774
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
775
{
776
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
777
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
778
index XXXXXXX..XXXXXXX 100644
779
--- a/tcg/i386/tcg-target.c.inc
780
+++ b/tcg/i386/tcg-target.c.inc
781
@@ -XXX,XX +XXX,XX @@ typedef struct {
782
} HostAddress;
783
784
#if defined(CONFIG_SOFTMMU)
785
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
786
- * int mmu_idx, uintptr_t ra)
787
- */
788
-static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
789
- [MO_UB] = helper_ret_ldub_mmu,
790
- [MO_LEUW] = helper_le_lduw_mmu,
791
- [MO_LEUL] = helper_le_ldul_mmu,
792
- [MO_LEUQ] = helper_le_ldq_mmu,
793
- [MO_BEUW] = helper_be_lduw_mmu,
794
- [MO_BEUL] = helper_be_ldul_mmu,
795
- [MO_BEUQ] = helper_be_ldq_mmu,
796
-};
797
-
798
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
799
- * uintxx_t val, int mmu_idx, uintptr_t ra)
800
- */
801
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
802
- [MO_UB] = helper_ret_stb_mmu,
803
- [MO_LEUW] = helper_le_stw_mmu,
804
- [MO_LEUL] = helper_le_stl_mmu,
805
- [MO_LEUQ] = helper_le_stq_mmu,
806
- [MO_BEUW] = helper_be_stw_mmu,
807
- [MO_BEUL] = helper_be_stl_mmu,
808
- [MO_BEUQ] = helper_be_stq_mmu,
809
-};
810
-
811
/*
812
* Because i686 has no register parameters and because x86_64 has xchg
813
* to handle addr/data register overlap, we have placed all input arguments
814
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
815
}
816
817
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
818
- tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
819
+ tcg_out_branch(s, 1, qemu_ld_helpers[opc & MO_SIZE]);
820
tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
821
822
tcg_out_jmp(s, l->raddr);
823
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
824
}
825
826
tcg_out_st_helper_args(s, l, &ldst_helper_param);
827
- tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
828
+ tcg_out_branch(s, 1, qemu_st_helpers[opc & MO_SIZE]);
829
830
tcg_out_jmp(s, l->raddr);
831
return true;
832
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
833
index XXXXXXX..XXXXXXX 100644
834
--- a/tcg/loongarch64/tcg-target.c.inc
835
+++ b/tcg/loongarch64/tcg-target.c.inc
836
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
837
*/
838
839
#if defined(CONFIG_SOFTMMU)
840
-/*
841
- * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
842
- * MemOpIdx oi, uintptr_t ra)
843
- */
844
-static void * const qemu_ld_helpers[4] = {
845
- [MO_8] = helper_ret_ldub_mmu,
846
- [MO_16] = helper_le_lduw_mmu,
847
- [MO_32] = helper_le_ldul_mmu,
848
- [MO_64] = helper_le_ldq_mmu,
849
-};
850
-
851
-/*
852
- * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
853
- * uintxx_t val, MemOpIdx oi,
854
- * uintptr_t ra)
855
- */
856
-static void * const qemu_st_helpers[4] = {
857
- [MO_8] = helper_ret_stb_mmu,
858
- [MO_16] = helper_le_stw_mmu,
859
- [MO_32] = helper_le_stl_mmu,
860
- [MO_64] = helper_le_stq_mmu,
861
-};
862
-
863
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
864
{
865
tcg_out_opc_b(s, 0);
866
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
867
index XXXXXXX..XXXXXXX 100644
868
--- a/tcg/mips/tcg-target.c.inc
869
+++ b/tcg/mips/tcg-target.c.inc
870
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
871
}
872
873
#if defined(CONFIG_SOFTMMU)
874
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
875
- [MO_UB] = helper_ret_ldub_mmu,
876
- [MO_SB] = helper_ret_ldsb_mmu,
877
-#if HOST_BIG_ENDIAN
878
- [MO_UW] = helper_be_lduw_mmu,
879
- [MO_SW] = helper_be_ldsw_mmu,
880
- [MO_UL] = helper_be_ldul_mmu,
881
- [MO_SL] = helper_be_ldsl_mmu,
882
- [MO_UQ] = helper_be_ldq_mmu,
883
-#else
884
- [MO_UW] = helper_le_lduw_mmu,
885
- [MO_SW] = helper_le_ldsw_mmu,
886
- [MO_UL] = helper_le_ldul_mmu,
887
- [MO_UQ] = helper_le_ldq_mmu,
888
- [MO_SL] = helper_le_ldsl_mmu,
889
-#endif
890
-};
891
-
892
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
893
- [MO_UB] = helper_ret_stb_mmu,
894
-#if HOST_BIG_ENDIAN
895
- [MO_UW] = helper_be_stw_mmu,
896
- [MO_UL] = helper_be_stl_mmu,
897
- [MO_UQ] = helper_be_stq_mmu,
898
-#else
899
- [MO_UW] = helper_le_stw_mmu,
900
- [MO_UL] = helper_le_stl_mmu,
901
- [MO_UQ] = helper_le_stq_mmu,
902
-#endif
903
-};
904
-
905
/* We have four temps, we might as well expose three of them. */
906
static const TCGLdstHelperParam ldst_helper_param = {
907
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
908
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
909
index XXXXXXX..XXXXXXX 100644
910
--- a/tcg/ppc/tcg-target.c.inc
911
+++ b/tcg/ppc/tcg-target.c.inc
912
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
913
};
914
915
#if defined (CONFIG_SOFTMMU)
916
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
917
- * int mmu_idx, uintptr_t ra)
918
- */
919
-static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
920
- [MO_UB] = helper_ret_ldub_mmu,
921
- [MO_LEUW] = helper_le_lduw_mmu,
922
- [MO_LEUL] = helper_le_ldul_mmu,
923
- [MO_LEUQ] = helper_le_ldq_mmu,
924
- [MO_BEUW] = helper_be_lduw_mmu,
925
- [MO_BEUL] = helper_be_ldul_mmu,
926
- [MO_BEUQ] = helper_be_ldq_mmu,
927
-};
928
-
929
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
930
- * uintxx_t val, int mmu_idx, uintptr_t ra)
931
- */
932
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
933
- [MO_UB] = helper_ret_stb_mmu,
934
- [MO_LEUW] = helper_le_stw_mmu,
935
- [MO_LEUL] = helper_le_stl_mmu,
936
- [MO_LEUQ] = helper_le_stq_mmu,
937
- [MO_BEUW] = helper_be_stw_mmu,
938
- [MO_BEUL] = helper_be_stl_mmu,
939
- [MO_BEUQ] = helper_be_stq_mmu,
940
-};
941
-
942
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
943
{
944
if (arg < 0) {
945
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
946
}
947
948
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
949
- tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
950
+ tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
951
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
952
953
tcg_out_b(s, 0, lb->raddr);
954
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
955
}
956
957
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
958
- tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
959
+ tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
960
961
tcg_out_b(s, 0, lb->raddr);
962
return true;
963
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
964
index XXXXXXX..XXXXXXX 100644
965
--- a/tcg/riscv/tcg-target.c.inc
966
+++ b/tcg/riscv/tcg-target.c.inc
967
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
968
*/
969
970
#if defined(CONFIG_SOFTMMU)
971
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
972
- * MemOpIdx oi, uintptr_t ra)
973
- */
974
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
975
- [MO_UB] = helper_ret_ldub_mmu,
976
- [MO_SB] = helper_ret_ldsb_mmu,
977
-#if HOST_BIG_ENDIAN
978
- [MO_UW] = helper_be_lduw_mmu,
979
- [MO_SW] = helper_be_ldsw_mmu,
980
- [MO_UL] = helper_be_ldul_mmu,
981
-#if TCG_TARGET_REG_BITS == 64
982
- [MO_SL] = helper_be_ldsl_mmu,
983
-#endif
984
- [MO_UQ] = helper_be_ldq_mmu,
985
-#else
986
- [MO_UW] = helper_le_lduw_mmu,
987
- [MO_SW] = helper_le_ldsw_mmu,
988
- [MO_UL] = helper_le_ldul_mmu,
989
-#if TCG_TARGET_REG_BITS == 64
990
- [MO_SL] = helper_le_ldsl_mmu,
991
-#endif
992
- [MO_UQ] = helper_le_ldq_mmu,
993
-#endif
994
-};
995
-
996
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
997
- * uintxx_t val, MemOpIdx oi,
998
- * uintptr_t ra)
999
- */
1000
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
1001
- [MO_8] = helper_ret_stb_mmu,
1002
-#if HOST_BIG_ENDIAN
1003
- [MO_16] = helper_be_stw_mmu,
1004
- [MO_32] = helper_be_stl_mmu,
1005
- [MO_64] = helper_be_stq_mmu,
1006
-#else
1007
- [MO_16] = helper_le_stw_mmu,
1008
- [MO_32] = helper_le_stl_mmu,
1009
- [MO_64] = helper_le_stq_mmu,
1010
-#endif
1011
-};
1012
-
1013
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1014
{
1015
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1016
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
1017
index XXXXXXX..XXXXXXX 100644
1018
--- a/tcg/s390x/tcg-target.c.inc
1019
+++ b/tcg/s390x/tcg-target.c.inc
1020
@@ -XXX,XX +XXX,XX @@ static const uint8_t tcg_cond_to_ltr_cond[] = {
1021
[TCG_COND_GEU] = S390_CC_ALWAYS,
1022
};
1023
1024
-#ifdef CONFIG_SOFTMMU
1025
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
1026
- [MO_UB] = helper_ret_ldub_mmu,
1027
- [MO_SB] = helper_ret_ldsb_mmu,
1028
- [MO_LEUW] = helper_le_lduw_mmu,
1029
- [MO_LESW] = helper_le_ldsw_mmu,
1030
- [MO_LEUL] = helper_le_ldul_mmu,
1031
- [MO_LESL] = helper_le_ldsl_mmu,
1032
- [MO_LEUQ] = helper_le_ldq_mmu,
1033
- [MO_BEUW] = helper_be_lduw_mmu,
1034
- [MO_BESW] = helper_be_ldsw_mmu,
1035
- [MO_BEUL] = helper_be_ldul_mmu,
1036
- [MO_BESL] = helper_be_ldsl_mmu,
1037
- [MO_BEUQ] = helper_be_ldq_mmu,
1038
-};
1039
-
1040
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1041
- [MO_UB] = helper_ret_stb_mmu,
1042
- [MO_LEUW] = helper_le_stw_mmu,
1043
- [MO_LEUL] = helper_le_stl_mmu,
1044
- [MO_LEUQ] = helper_le_stq_mmu,
1045
- [MO_BEUW] = helper_be_stw_mmu,
1046
- [MO_BEUL] = helper_be_stl_mmu,
1047
- [MO_BEUQ] = helper_be_stq_mmu,
1048
-};
1049
-#endif
1050
-
1051
static const tcg_insn_unit *tb_ret_addr;
1052
uint64_t s390_facilities[3];
1053
1054
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1055
}
1056
1057
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1058
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1059
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1060
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1061
1062
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1063
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1064
}
1065
1066
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1067
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1068
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1069
1070
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1071
return true;
1072
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
1073
index XXXXXXX..XXXXXXX 100644
1074
--- a/tcg/sparc64/tcg-target.c.inc
1075
+++ b/tcg/sparc64/tcg-target.c.inc
1076
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
1077
}
1078
1079
#ifdef CONFIG_SOFTMMU
1080
-static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
1081
-static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
1082
+static const tcg_insn_unit *qemu_ld_trampoline[MO_SSIZE + 1];
1083
+static const tcg_insn_unit *qemu_st_trampoline[MO_SIZE + 1];
1084
1085
static void build_trampolines(TCGContext *s)
1086
{
1087
- static void * const qemu_ld_helpers[] = {
1088
- [MO_UB] = helper_ret_ldub_mmu,
1089
- [MO_SB] = helper_ret_ldsb_mmu,
1090
- [MO_LEUW] = helper_le_lduw_mmu,
1091
- [MO_LESW] = helper_le_ldsw_mmu,
1092
- [MO_LEUL] = helper_le_ldul_mmu,
1093
- [MO_LEUQ] = helper_le_ldq_mmu,
1094
- [MO_BEUW] = helper_be_lduw_mmu,
1095
- [MO_BESW] = helper_be_ldsw_mmu,
1096
- [MO_BEUL] = helper_be_ldul_mmu,
1097
- [MO_BEUQ] = helper_be_ldq_mmu,
1098
- };
1099
- static void * const qemu_st_helpers[] = {
1100
- [MO_UB] = helper_ret_stb_mmu,
1101
- [MO_LEUW] = helper_le_stw_mmu,
1102
- [MO_LEUL] = helper_le_stl_mmu,
1103
- [MO_LEUQ] = helper_le_stq_mmu,
1104
- [MO_BEUW] = helper_be_stw_mmu,
1105
- [MO_BEUL] = helper_be_stl_mmu,
1106
- [MO_BEUQ] = helper_be_stq_mmu,
1107
- };
1108
-
1109
int i;
1110
1111
for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
1112
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1113
/* We use the helpers to extend SB and SW data, leaving the case
1114
of SL needing explicit extending below. */
1115
if ((memop & MO_SSIZE) == MO_SL) {
1116
- func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1117
+ func = qemu_ld_trampoline[MO_UL];
1118
} else {
1119
- func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1120
+ func = qemu_ld_trampoline[memop & MO_SSIZE];
1121
}
1122
tcg_debug_assert(func != NULL);
1123
tcg_out_call_nodelay(s, func, false);
1124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1125
tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
1126
TCG_REG_O2, data_type, memop & MO_SIZE, data);
1127
1128
- func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1129
+ func = qemu_st_trampoline[memop & MO_SIZE];
1130
tcg_debug_assert(func != NULL);
1131
tcg_out_call_nodelay(s, func, false);
1132
/* delay slot */
1133
--
1134
2.34.1
1135
1136
diff view generated by jsdifflib
New patch
1
TCG backends may need to defer to a helper to implement
2
the atomicity required by a given operation. Mirror the
3
interface used in system mode.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-ldst.h | 6 +-
9
accel/tcg/user-exec.c | 393 ++++++++++++++++++++++++++++-------------
10
tcg/tcg.c | 6 +-
11
3 files changed, 278 insertions(+), 127 deletions(-)
12
13
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-ldst.h
16
+++ b/include/tcg/tcg-ldst.h
17
@@ -XXX,XX +XXX,XX @@
18
#ifndef TCG_LDST_H
19
#define TCG_LDST_H
20
21
-#ifdef CONFIG_SOFTMMU
22
-
23
/* Value zero-extended to tcg register size. */
24
tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
25
MemOpIdx oi, uintptr_t retaddr);
26
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
27
void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
28
MemOpIdx oi, uintptr_t retaddr);
29
30
-#else
31
+#ifdef CONFIG_USER_ONLY
32
33
G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
34
G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
35
36
-#endif /* CONFIG_SOFTMMU */
37
+#endif /* CONFIG_USER_ONLY */
38
#endif /* TCG_LDST_H */
39
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/accel/tcg/user-exec.c
42
+++ b/accel/tcg/user-exec.c
43
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
44
45
/* The softmmu versions of these helpers are in cputlb.c. */
46
47
-/*
48
- * Verify that we have passed the correct MemOp to the correct function.
49
- *
50
- * We could present one function to target code, and dispatch based on
51
- * the MemOp, but so far we have worked hard to avoid an indirect function
52
- * call along the memory path.
53
- */
54
-static void validate_memop(MemOpIdx oi, MemOp expected)
55
-{
56
-#ifdef CONFIG_DEBUG_TCG
57
- MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
58
- assert(have == expected);
59
-#endif
60
-}
61
-
62
void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
63
{
64
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
65
@@ -XXX,XX +XXX,XX @@ void helper_unaligned_st(CPUArchState *env, target_ulong addr)
66
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
67
}
68
69
-static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
70
- MemOpIdx oi, uintptr_t ra, MMUAccessType type)
71
+static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
72
+ MemOp mop, uintptr_t ra, MMUAccessType type)
73
{
74
- MemOp mop = get_memop(oi);
75
int a_bits = get_alignment_bits(mop);
76
void *ret;
77
78
@@ -XXX,XX +XXX,XX @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
79
80
#include "ldst_atomicity.c.inc"
81
82
-uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
83
- MemOpIdx oi, uintptr_t ra)
84
+static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
85
+ MemOp mop, uintptr_t ra)
86
{
87
void *haddr;
88
uint8_t ret;
89
90
- validate_memop(oi, MO_UB);
91
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
92
+ tcg_debug_assert((mop & MO_SIZE) == MO_8);
93
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
94
ret = ldub_p(haddr);
95
clear_helper_retaddr();
96
+ return ret;
97
+}
98
+
99
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
100
+ MemOpIdx oi, uintptr_t ra)
101
+{
102
+ return do_ld1_mmu(env, addr, get_memop(oi), ra);
103
+}
104
+
105
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
106
+ MemOpIdx oi, uintptr_t ra)
107
+{
108
+ return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
109
+}
110
+
111
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
112
+ MemOpIdx oi, uintptr_t ra)
113
+{
114
+ uint8_t ret = do_ld1_mmu(env, addr, get_memop(oi), ra);
115
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
116
return ret;
117
}
118
119
+static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
120
+ MemOp mop, uintptr_t ra)
121
+{
122
+ void *haddr;
123
+ uint16_t ret;
124
+
125
+ tcg_debug_assert((mop & MO_SIZE) == MO_16);
126
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
127
+ ret = load_atom_2(env, ra, haddr, mop);
128
+ clear_helper_retaddr();
129
+ return ret;
130
+}
131
+
132
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
133
+ MemOpIdx oi, uintptr_t ra)
134
+{
135
+ MemOp mop = get_memop(oi);
136
+ uint16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
137
+
138
+ if (mop & MO_BSWAP) {
139
+ ret = bswap16(ret);
140
+ }
141
+ return ret;
142
+}
143
+
144
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
145
+ MemOpIdx oi, uintptr_t ra)
146
+{
147
+ MemOp mop = get_memop(oi);
148
+ int16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
149
+
150
+ if (mop & MO_BSWAP) {
151
+ ret = bswap16(ret);
152
+ }
153
+ return ret;
154
+}
155
+
156
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
157
MemOpIdx oi, uintptr_t ra)
158
{
159
- void *haddr;
160
+ MemOp mop = get_memop(oi);
161
uint16_t ret;
162
163
- validate_memop(oi, MO_BEUW);
164
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
165
- ret = load_atom_2(env, ra, haddr, get_memop(oi));
166
- clear_helper_retaddr();
167
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
168
+ ret = do_ld2_he_mmu(env, addr, mop, ra);
169
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
170
return cpu_to_be16(ret);
171
}
172
173
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
174
- MemOpIdx oi, uintptr_t ra)
175
-{
176
- void *haddr;
177
- uint32_t ret;
178
-
179
- validate_memop(oi, MO_BEUL);
180
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
181
- ret = load_atom_4(env, ra, haddr, get_memop(oi));
182
- clear_helper_retaddr();
183
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
184
- return cpu_to_be32(ret);
185
-}
186
-
187
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
188
- MemOpIdx oi, uintptr_t ra)
189
-{
190
- void *haddr;
191
- uint64_t ret;
192
-
193
- validate_memop(oi, MO_BEUQ);
194
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
195
- ret = load_atom_8(env, ra, haddr, get_memop(oi));
196
- clear_helper_retaddr();
197
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
198
- return cpu_to_be64(ret);
199
-}
200
-
201
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
202
MemOpIdx oi, uintptr_t ra)
203
{
204
- void *haddr;
205
+ MemOp mop = get_memop(oi);
206
uint16_t ret;
207
208
- validate_memop(oi, MO_LEUW);
209
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
210
- ret = load_atom_2(env, ra, haddr, get_memop(oi));
211
- clear_helper_retaddr();
212
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
213
+ ret = do_ld2_he_mmu(env, addr, mop, ra);
214
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
215
return cpu_to_le16(ret);
216
}
217
218
+static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
219
+ MemOp mop, uintptr_t ra)
220
+{
221
+ void *haddr;
222
+ uint32_t ret;
223
+
224
+ tcg_debug_assert((mop & MO_SIZE) == MO_32);
225
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
226
+ ret = load_atom_4(env, ra, haddr, mop);
227
+ clear_helper_retaddr();
228
+ return ret;
229
+}
230
+
231
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
232
+ MemOpIdx oi, uintptr_t ra)
233
+{
234
+ MemOp mop = get_memop(oi);
235
+ uint32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
236
+
237
+ if (mop & MO_BSWAP) {
238
+ ret = bswap32(ret);
239
+ }
240
+ return ret;
241
+}
242
+
243
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
244
+ MemOpIdx oi, uintptr_t ra)
245
+{
246
+ MemOp mop = get_memop(oi);
247
+ int32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
248
+
249
+ if (mop & MO_BSWAP) {
250
+ ret = bswap32(ret);
251
+ }
252
+ return ret;
253
+}
254
+
255
+uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
256
+ MemOpIdx oi, uintptr_t ra)
257
+{
258
+ MemOp mop = get_memop(oi);
259
+ uint32_t ret;
260
+
261
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
262
+ ret = do_ld4_he_mmu(env, addr, mop, ra);
263
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
264
+ return cpu_to_be32(ret);
265
+}
266
+
267
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
268
MemOpIdx oi, uintptr_t ra)
269
{
270
- void *haddr;
271
+ MemOp mop = get_memop(oi);
272
uint32_t ret;
273
274
- validate_memop(oi, MO_LEUL);
275
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
276
- ret = load_atom_4(env, ra, haddr, get_memop(oi));
277
- clear_helper_retaddr();
278
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
279
+ ret = do_ld4_he_mmu(env, addr, mop, ra);
280
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
281
return cpu_to_le32(ret);
282
}
283
284
+static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
285
+ MemOp mop, uintptr_t ra)
286
+{
287
+ void *haddr;
288
+ uint64_t ret;
289
+
290
+ tcg_debug_assert((mop & MO_SIZE) == MO_64);
291
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
292
+ ret = load_atom_8(env, ra, haddr, mop);
293
+ clear_helper_retaddr();
294
+ return ret;
295
+}
296
+
297
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
298
+ MemOpIdx oi, uintptr_t ra)
299
+{
300
+ MemOp mop = get_memop(oi);
301
+ uint64_t ret = do_ld8_he_mmu(env, addr, mop, ra);
302
+
303
+ if (mop & MO_BSWAP) {
304
+ ret = bswap64(ret);
305
+ }
306
+ return ret;
307
+}
308
+
309
+uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
310
+ MemOpIdx oi, uintptr_t ra)
311
+{
312
+ MemOp mop = get_memop(oi);
313
+ uint64_t ret;
314
+
315
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
316
+ ret = do_ld8_he_mmu(env, addr, mop, ra);
317
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
318
+ return cpu_to_be64(ret);
319
+}
320
+
321
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
322
MemOpIdx oi, uintptr_t ra)
323
{
324
- void *haddr;
325
+ MemOp mop = get_memop(oi);
326
uint64_t ret;
327
328
- validate_memop(oi, MO_LEUQ);
329
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
330
- ret = load_atom_8(env, ra, haddr, get_memop(oi));
331
- clear_helper_retaddr();
332
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
333
+ ret = do_ld8_he_mmu(env, addr, mop, ra);
334
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
335
return cpu_to_le64(ret);
336
}
337
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
338
void *haddr;
339
Int128 ret;
340
341
- validate_memop(oi, MO_128 | MO_BE);
342
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
343
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
344
memcpy(&ret, haddr, 16);
345
clear_helper_retaddr();
346
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
347
void *haddr;
348
Int128 ret;
349
350
- validate_memop(oi, MO_128 | MO_LE);
351
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
352
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
353
memcpy(&ret, haddr, 16);
354
clear_helper_retaddr();
355
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
356
return ret;
357
}
358
359
-void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
360
- MemOpIdx oi, uintptr_t ra)
361
+static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
362
+ MemOp mop, uintptr_t ra)
363
{
364
void *haddr;
365
366
- validate_memop(oi, MO_UB);
367
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
368
+ tcg_debug_assert((mop & MO_SIZE) == MO_8);
369
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
370
stb_p(haddr, val);
371
clear_helper_retaddr();
372
+}
373
+
374
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
375
+ MemOpIdx oi, uintptr_t ra)
376
+{
377
+ do_st1_mmu(env, addr, val, get_memop(oi), ra);
378
+}
379
+
380
+void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
381
+ MemOpIdx oi, uintptr_t ra)
382
+{
383
+ do_st1_mmu(env, addr, val, get_memop(oi), ra);
384
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
385
}
386
387
+static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
388
+ MemOp mop, uintptr_t ra)
389
+{
390
+ void *haddr;
391
+
392
+ tcg_debug_assert((mop & MO_SIZE) == MO_16);
393
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
394
+ store_atom_2(env, ra, haddr, mop, val);
395
+ clear_helper_retaddr();
396
+}
397
+
398
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
399
+ MemOpIdx oi, uintptr_t ra)
400
+{
401
+ MemOp mop = get_memop(oi);
402
+
403
+ if (mop & MO_BSWAP) {
404
+ val = bswap16(val);
405
+ }
406
+ do_st2_he_mmu(env, addr, val, mop, ra);
407
+}
408
+
409
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
410
MemOpIdx oi, uintptr_t ra)
411
{
412
- void *haddr;
413
+ MemOp mop = get_memop(oi);
414
415
- validate_memop(oi, MO_BEUW);
416
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
417
- store_atom_2(env, ra, haddr, get_memop(oi), be16_to_cpu(val));
418
- clear_helper_retaddr();
419
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
420
-}
421
-
422
-void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
423
- MemOpIdx oi, uintptr_t ra)
424
-{
425
- void *haddr;
426
-
427
- validate_memop(oi, MO_BEUL);
428
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
429
- store_atom_4(env, ra, haddr, get_memop(oi), be32_to_cpu(val));
430
- clear_helper_retaddr();
431
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
432
-}
433
-
434
-void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
435
- MemOpIdx oi, uintptr_t ra)
436
-{
437
- void *haddr;
438
-
439
- validate_memop(oi, MO_BEUQ);
440
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
441
- store_atom_8(env, ra, haddr, get_memop(oi), be64_to_cpu(val));
442
- clear_helper_retaddr();
443
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
444
+ do_st2_he_mmu(env, addr, be16_to_cpu(val), mop, ra);
445
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
446
}
447
448
void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
449
MemOpIdx oi, uintptr_t ra)
450
+{
451
+ MemOp mop = get_memop(oi);
452
+
453
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
454
+ do_st2_he_mmu(env, addr, le16_to_cpu(val), mop, ra);
455
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
456
+}
457
+
458
+static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
459
+ MemOp mop, uintptr_t ra)
460
{
461
void *haddr;
462
463
- validate_memop(oi, MO_LEUW);
464
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
465
- store_atom_2(env, ra, haddr, get_memop(oi), le16_to_cpu(val));
466
+ tcg_debug_assert((mop & MO_SIZE) == MO_32);
467
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
468
+ store_atom_4(env, ra, haddr, mop, val);
469
clear_helper_retaddr();
470
+}
471
+
472
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
473
+ MemOpIdx oi, uintptr_t ra)
474
+{
475
+ MemOp mop = get_memop(oi);
476
+
477
+ if (mop & MO_BSWAP) {
478
+ val = bswap32(val);
479
+ }
480
+ do_st4_he_mmu(env, addr, val, mop, ra);
481
+}
482
+
483
+void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
484
+ MemOpIdx oi, uintptr_t ra)
485
+{
486
+ MemOp mop = get_memop(oi);
487
+
488
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
489
+ do_st4_he_mmu(env, addr, be32_to_cpu(val), mop, ra);
490
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
491
}
492
493
void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
494
MemOpIdx oi, uintptr_t ra)
495
+{
496
+ MemOp mop = get_memop(oi);
497
+
498
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
499
+ do_st4_he_mmu(env, addr, le32_to_cpu(val), mop, ra);
500
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
501
+}
502
+
503
+static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
504
+ MemOp mop, uintptr_t ra)
505
{
506
void *haddr;
507
508
- validate_memop(oi, MO_LEUL);
509
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
510
- store_atom_4(env, ra, haddr, get_memop(oi), le32_to_cpu(val));
511
+ tcg_debug_assert((mop & MO_SIZE) == MO_64);
512
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
513
+ store_atom_8(env, ra, haddr, mop, val);
514
clear_helper_retaddr();
515
+}
516
+
517
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
518
+ MemOpIdx oi, uintptr_t ra)
519
+{
520
+ MemOp mop = get_memop(oi);
521
+
522
+ if (mop & MO_BSWAP) {
523
+ val = bswap64(val);
524
+ }
525
+ do_st8_he_mmu(env, addr, val, mop, ra);
526
+}
527
+
528
+void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
529
+ MemOpIdx oi, uintptr_t ra)
530
+{
531
+ MemOp mop = get_memop(oi);
532
+
533
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
534
+ do_st8_he_mmu(env, addr, cpu_to_be64(val), mop, ra);
535
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
536
}
537
538
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
539
MemOpIdx oi, uintptr_t ra)
540
{
541
- void *haddr;
542
+ MemOp mop = get_memop(oi);
543
544
- validate_memop(oi, MO_LEUQ);
545
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
546
- store_atom_8(env, ra, haddr, get_memop(oi), le64_to_cpu(val));
547
- clear_helper_retaddr();
548
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
549
+ do_st8_he_mmu(env, addr, cpu_to_le64(val), mop, ra);
550
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
551
}
552
553
@@ -XXX,XX +XXX,XX @@ void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
554
{
555
void *haddr;
556
557
- validate_memop(oi, MO_128 | MO_BE);
558
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
559
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
560
if (!HOST_BIG_ENDIAN) {
561
val = bswap128(val);
562
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
563
{
564
void *haddr;
565
566
- validate_memop(oi, MO_128 | MO_LE);
567
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
568
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
569
if (HOST_BIG_ENDIAN) {
570
val = bswap128(val);
571
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
572
void *haddr;
573
uint64_t ret;
574
575
- validate_memop(oi, MO_BEUQ);
576
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
577
ret = ldq_p(haddr);
578
clear_helper_retaddr();
579
diff --git a/tcg/tcg.c b/tcg/tcg.c
580
index XXXXXXX..XXXXXXX 100644
581
--- a/tcg/tcg.c
582
+++ b/tcg/tcg.c
583
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
584
const TCGLdstHelperParam *p)
585
__attribute__((unused));
586
587
-#ifdef CONFIG_SOFTMMU
588
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
589
+static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
590
[MO_UB] = helper_ldub_mmu,
591
[MO_SB] = helper_ldsb_mmu,
592
[MO_UW] = helper_lduw_mmu,
593
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
594
#endif
595
};
596
597
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
598
+static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
599
[MO_8] = helper_stb_mmu,
600
[MO_16] = helper_stw_mmu,
601
[MO_32] = helper_stl_mmu,
602
[MO_64] = helper_stq_mmu,
603
};
604
-#endif
605
606
TCGContext tcg_init_ctx;
607
__thread TCGContext *tcg_ctx;
608
--
609
2.34.1
diff view generated by jsdifflib
New patch
1
We can now fold these two pieces of code.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/tci.c | 89 -------------------------------------------------------
7
1 file changed, 89 deletions(-)
8
9
diff --git a/tcg/tci.c b/tcg/tci.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tci.c
12
+++ b/tcg/tci.c
13
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
14
MemOp mop = get_memop(oi);
15
uintptr_t ra = (uintptr_t)tb_ptr;
16
17
-#ifdef CONFIG_SOFTMMU
18
switch (mop & MO_SSIZE) {
19
case MO_UB:
20
return helper_ldub_mmu(env, taddr, oi, ra);
21
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
22
default:
23
g_assert_not_reached();
24
}
25
-#else
26
- void *haddr = g2h(env_cpu(env), taddr);
27
- unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
28
- uint64_t ret;
29
-
30
- set_helper_retaddr(ra);
31
- if (taddr & a_mask) {
32
- helper_unaligned_ld(env, taddr);
33
- }
34
- switch (mop & (MO_BSWAP | MO_SSIZE)) {
35
- case MO_UB:
36
- ret = ldub_p(haddr);
37
- break;
38
- case MO_SB:
39
- ret = ldsb_p(haddr);
40
- break;
41
- case MO_LEUW:
42
- ret = lduw_le_p(haddr);
43
- break;
44
- case MO_LESW:
45
- ret = ldsw_le_p(haddr);
46
- break;
47
- case MO_LEUL:
48
- ret = (uint32_t)ldl_le_p(haddr);
49
- break;
50
- case MO_LESL:
51
- ret = (int32_t)ldl_le_p(haddr);
52
- break;
53
- case MO_LEUQ:
54
- ret = ldq_le_p(haddr);
55
- break;
56
- case MO_BEUW:
57
- ret = lduw_be_p(haddr);
58
- break;
59
- case MO_BESW:
60
- ret = ldsw_be_p(haddr);
61
- break;
62
- case MO_BEUL:
63
- ret = (uint32_t)ldl_be_p(haddr);
64
- break;
65
- case MO_BESL:
66
- ret = (int32_t)ldl_be_p(haddr);
67
- break;
68
- case MO_BEUQ:
69
- ret = ldq_be_p(haddr);
70
- break;
71
- default:
72
- g_assert_not_reached();
73
- }
74
- clear_helper_retaddr();
75
- return ret;
76
-#endif
77
}
78
79
static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
80
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
81
MemOp mop = get_memop(oi);
82
uintptr_t ra = (uintptr_t)tb_ptr;
83
84
-#ifdef CONFIG_SOFTMMU
85
switch (mop & MO_SIZE) {
86
case MO_UB:
87
helper_stb_mmu(env, taddr, val, oi, ra);
88
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
89
default:
90
g_assert_not_reached();
91
}
92
-#else
93
- void *haddr = g2h(env_cpu(env), taddr);
94
- unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
95
-
96
- set_helper_retaddr(ra);
97
- if (taddr & a_mask) {
98
- helper_unaligned_st(env, taddr);
99
- }
100
- switch (mop & (MO_BSWAP | MO_SIZE)) {
101
- case MO_UB:
102
- stb_p(haddr, val);
103
- break;
104
- case MO_LEUW:
105
- stw_le_p(haddr, val);
106
- break;
107
- case MO_LEUL:
108
- stl_le_p(haddr, val);
109
- break;
110
- case MO_LEUQ:
111
- stq_le_p(haddr, val);
112
- break;
113
- case MO_BEUW:
114
- stw_be_p(haddr, val);
115
- break;
116
- case MO_BEUL:
117
- stl_be_p(haddr, val);
118
- break;
119
- case MO_BEUQ:
120
- stq_be_p(haddr, val);
121
- break;
122
- default:
123
- g_assert_not_reached();
124
- }
125
- clear_helper_retaddr();
126
-#endif
127
}
128
129
#if TCG_TARGET_REG_BITS == 64
130
--
131
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
accel/tcg/tcg-runtime.h | 3 +
5
include/tcg/tcg-ldst.h | 4 +
6
accel/tcg/cputlb.c | 399 +++++++++++++++++++++++++--------
7
accel/tcg/user-exec.c | 94 ++++++--
8
tcg/tcg-op.c | 173 +++++++++-----
9
accel/tcg/ldst_atomicity.c.inc | 184 +++++++++++++++
10
6 files changed, 679 insertions(+), 178 deletions(-)
1
11
12
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/tcg-runtime.h
15
+++ b/accel/tcg/tcg-runtime.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
17
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
18
#endif /* IN_HELPER_PROTO */
19
20
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
21
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
22
+
23
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
24
i32, env, tl, i32, i32, i32)
25
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
26
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/include/tcg/tcg-ldst.h
29
+++ b/include/tcg/tcg-ldst.h
30
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
31
MemOpIdx oi, uintptr_t retaddr);
32
uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
33
MemOpIdx oi, uintptr_t retaddr);
34
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
35
+ MemOpIdx oi, uintptr_t retaddr);
36
37
/* Value sign-extended to tcg register size. */
38
tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
39
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
40
MemOpIdx oi, uintptr_t retaddr);
41
void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
42
MemOpIdx oi, uintptr_t retaddr);
43
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
44
+ MemOpIdx oi, uintptr_t retaddr);
45
46
#ifdef CONFIG_USER_ONLY
47
48
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/accel/tcg/cputlb.c
51
+++ b/accel/tcg/cputlb.c
52
@@ -XXX,XX +XXX,XX @@
53
#include "qemu/plugin-memory.h"
54
#endif
55
#include "tcg/tcg-ldst.h"
56
+#include "exec/helper-proto.h"
57
58
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
59
/* #define DEBUG_TLB */
60
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
61
return (ret_be << (p->size * 8)) | x;
62
}
63
64
+/**
65
+ * do_ld_parts_be16
66
+ * @p: translation parameters
67
+ * @ret_be: accumulated data
68
+ *
69
+ * As do_ld_bytes_beN, but with one atomic load.
70
+ * 16 aligned bytes are guaranteed to cover the load.
71
+ */
72
+static Int128 do_ld_whole_be16(CPUArchState *env, uintptr_t ra,
73
+ MMULookupPageData *p, uint64_t ret_be)
74
+{
75
+ int o = p->addr & 15;
76
+ Int128 x, y = load_atomic16_or_exit(env, ra, p->haddr - o);
77
+ int size = p->size;
78
+
79
+ if (!HOST_BIG_ENDIAN) {
80
+ y = bswap128(y);
81
+ }
82
+ y = int128_lshift(y, o * 8);
83
+ y = int128_urshift(y, (16 - size) * 8);
84
+ x = int128_make64(ret_be);
85
+ x = int128_lshift(x, size * 8);
86
+ return int128_or(x, y);
87
+}
88
+
89
/*
90
* Wrapper for the above.
91
*/
92
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
93
}
94
}
95
96
+/*
97
+ * Wrapper for the above, for 8 < size < 16.
98
+ */
99
+static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
100
+ uint64_t a, int mmu_idx, MemOp mop, uintptr_t ra)
101
+{
102
+ int size = p->size;
103
+ uint64_t b;
104
+ MemOp atom;
105
+
106
+ if (unlikely(p->flags & TLB_MMIO)) {
107
+ p->size = size - 8;
108
+ a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra);
109
+ p->addr += p->size;
110
+ p->size = 8;
111
+ b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra);
112
+ return int128_make128(b, a);
113
+ }
114
+
115
+ /*
116
+ * It is a given that we cross a page and therefore there is no
117
+ * atomicity for the load as a whole, but subobjects may need attention.
118
+ */
119
+ atom = mop & MO_ATOM_MASK;
120
+ switch (atom) {
121
+ case MO_ATOM_SUBALIGN:
122
+ p->size = size - 8;
123
+ a = do_ld_parts_beN(p, a);
124
+ p->haddr += size - 8;
125
+ p->size = 8;
126
+ b = do_ld_parts_beN(p, 0);
127
+ break;
128
+
129
+ case MO_ATOM_WITHIN16_PAIR:
130
+ /* Since size > 8, this is the half that must be atomic. */
131
+ return do_ld_whole_be16(env, ra, p, a);
132
+
133
+ case MO_ATOM_IFALIGN_PAIR:
134
+ /*
135
+ * Since size > 8, both halves are misaligned,
136
+ * and so neither is atomic.
137
+ */
138
+ case MO_ATOM_IFALIGN:
139
+ case MO_ATOM_WITHIN16:
140
+ case MO_ATOM_NONE:
141
+ p->size = size - 8;
142
+ a = do_ld_bytes_beN(p, a);
143
+ b = ldq_be_p(p->haddr + size - 8);
144
+ break;
145
+
146
+ default:
147
+ g_assert_not_reached();
148
+ }
149
+
150
+ return int128_make128(b, a);
151
+}
152
+
153
static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
154
MMUAccessType type, uintptr_t ra)
155
{
156
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
157
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
158
}
159
160
+static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
161
+ MemOpIdx oi, uintptr_t ra)
162
+{
163
+ MMULookupLocals l;
164
+ bool crosspage;
165
+ uint64_t a, b;
166
+ Int128 ret;
167
+ int first;
168
+
169
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
170
+ if (likely(!crosspage)) {
171
+ /* Perform the load host endian. */
172
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
173
+ QEMU_IOTHREAD_LOCK_GUARD();
174
+ a = io_readx(env, l.page[0].full, l.mmu_idx, addr,
175
+ ra, MMU_DATA_LOAD, MO_64);
176
+ b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8,
177
+ ra, MMU_DATA_LOAD, MO_64);
178
+ ret = int128_make128(HOST_BIG_ENDIAN ? b : a,
179
+ HOST_BIG_ENDIAN ? a : b);
180
+ } else {
181
+ ret = load_atom_16(env, ra, l.page[0].haddr, l.memop);
182
+ }
183
+ if (l.memop & MO_BSWAP) {
184
+ ret = bswap128(ret);
185
+ }
186
+ return ret;
187
+ }
188
+
189
+ first = l.page[0].size;
190
+ if (first == 8) {
191
+ MemOp mop8 = (l.memop & ~MO_SIZE) | MO_64;
192
+
193
+ a = do_ld_8(env, &l.page[0], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
194
+ b = do_ld_8(env, &l.page[1], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
195
+ if ((mop8 & MO_BSWAP) == MO_LE) {
196
+ ret = int128_make128(a, b);
197
+ } else {
198
+ ret = int128_make128(b, a);
199
+ }
200
+ return ret;
201
+ }
202
+
203
+ if (first < 8) {
204
+ a = do_ld_beN(env, &l.page[0], 0, l.mmu_idx,
205
+ MMU_DATA_LOAD, l.memop, ra);
206
+ ret = do_ld16_beN(env, &l.page[1], a, l.mmu_idx, l.memop, ra);
207
+ } else {
208
+ ret = do_ld16_beN(env, &l.page[0], 0, l.mmu_idx, l.memop, ra);
209
+ b = int128_getlo(ret);
210
+ ret = int128_lshift(ret, l.page[1].size * 8);
211
+ a = int128_gethi(ret);
212
+ b = do_ld_beN(env, &l.page[1], b, l.mmu_idx,
213
+ MMU_DATA_LOAD, l.memop, ra);
214
+ ret = int128_make128(b, a);
215
+ }
216
+ if ((l.memop & MO_BSWAP) == MO_LE) {
217
+ ret = bswap128(ret);
218
+ }
219
+ return ret;
220
+}
221
+
222
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
223
+ uint32_t oi, uintptr_t retaddr)
224
+{
225
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
226
+ return do_ld16_mmu(env, addr, oi, retaddr);
227
+}
228
+
229
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
230
+{
231
+ return helper_ld16_mmu(env, addr, oi, GETPC());
232
+}
233
+
234
/*
235
* Load helpers for cpu_ldst.h.
236
*/
237
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
238
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
239
MemOpIdx oi, uintptr_t ra)
240
{
241
- MemOp mop = get_memop(oi);
242
- int mmu_idx = get_mmuidx(oi);
243
- MemOpIdx new_oi;
244
- unsigned a_bits;
245
- uint64_t h, l;
246
+ Int128 ret;
247
248
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
249
- a_bits = get_alignment_bits(mop);
250
-
251
- /* Handle CPU specific unaligned behaviour */
252
- if (addr & ((1 << a_bits) - 1)) {
253
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
254
- mmu_idx, ra);
255
- }
256
-
257
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
258
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
259
- new_oi = make_memop_idx(mop, mmu_idx);
260
-
261
- h = helper_ldq_mmu(env, addr, new_oi, ra);
262
- l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
263
-
264
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
265
- return int128_make128(l, h);
266
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
267
+ ret = do_ld16_mmu(env, addr, oi, ra);
268
+ plugin_load_cb(env, addr, oi);
269
+ return ret;
270
}
271
272
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
273
MemOpIdx oi, uintptr_t ra)
274
{
275
- MemOp mop = get_memop(oi);
276
- int mmu_idx = get_mmuidx(oi);
277
- MemOpIdx new_oi;
278
- unsigned a_bits;
279
- uint64_t h, l;
280
+ Int128 ret;
281
282
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
283
- a_bits = get_alignment_bits(mop);
284
-
285
- /* Handle CPU specific unaligned behaviour */
286
- if (addr & ((1 << a_bits) - 1)) {
287
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
288
- mmu_idx, ra);
289
- }
290
-
291
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
292
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
293
- new_oi = make_memop_idx(mop, mmu_idx);
294
-
295
- l = helper_ldq_mmu(env, addr, new_oi, ra);
296
- h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
297
-
298
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
299
- return int128_make128(l, h);
300
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
301
+ ret = do_ld16_mmu(env, addr, oi, ra);
302
+ plugin_load_cb(env, addr, oi);
303
+ return ret;
304
}
305
306
/*
307
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
308
}
309
}
310
311
+/*
312
+ * Wrapper for the above, for 8 < size < 16.
313
+ */
314
+static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
315
+ Int128 val_le, int mmu_idx,
316
+ MemOp mop, uintptr_t ra)
317
+{
318
+ int size = p->size;
319
+ MemOp atom;
320
+
321
+ if (unlikely(p->flags & TLB_MMIO)) {
322
+ p->size = 8;
323
+ do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra);
324
+ p->size = size - 8;
325
+ p->addr += 8;
326
+ return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra);
327
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
328
+ return int128_gethi(val_le) >> ((size - 8) * 8);
329
+ }
330
+
331
+ /*
332
+ * It is a given that we cross a page and therefore there is no atomicity
333
+ * for the store as a whole, but subobjects may need attention.
334
+ */
335
+ atom = mop & MO_ATOM_MASK;
336
+ switch (atom) {
337
+ case MO_ATOM_SUBALIGN:
338
+ store_parts_leN(p->haddr, 8, int128_getlo(val_le));
339
+ return store_parts_leN(p->haddr + 8, p->size - 8,
340
+ int128_gethi(val_le));
341
+
342
+ case MO_ATOM_WITHIN16_PAIR:
343
+ /* Since size > 8, this is the half that must be atomic. */
344
+ if (!HAVE_al16) {
345
+ cpu_loop_exit_atomic(env_cpu(env), ra);
346
+ }
347
+ return store_whole_le16(p->haddr, p->size, val_le);
348
+
349
+ case MO_ATOM_IFALIGN_PAIR:
350
+ /*
351
+ * Since size > 8, both halves are misaligned,
352
+ * and so neither is atomic.
353
+ */
354
+ case MO_ATOM_IFALIGN:
355
+ case MO_ATOM_NONE:
356
+ stq_le_p(p->haddr, int128_getlo(val_le));
357
+ return store_bytes_leN(p->haddr + 8, p->size - 8,
358
+ int128_gethi(val_le));
359
+
360
+ default:
361
+ g_assert_not_reached();
362
+ }
363
+}
364
+
365
static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
366
int mmu_idx, uintptr_t ra)
367
{
368
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
369
do_st8_mmu(env, addr, val, oi, retaddr);
370
}
371
372
+static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
373
+ MemOpIdx oi, uintptr_t ra)
374
+{
375
+ MMULookupLocals l;
376
+ bool crosspage;
377
+ uint64_t a, b;
378
+ int first;
379
+
380
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
381
+ if (likely(!crosspage)) {
382
+ /* Swap to host endian if necessary, then store. */
383
+ if (l.memop & MO_BSWAP) {
384
+ val = bswap128(val);
385
+ }
386
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
387
+ QEMU_IOTHREAD_LOCK_GUARD();
388
+ if (HOST_BIG_ENDIAN) {
389
+ b = int128_getlo(val), a = int128_gethi(val);
390
+ } else {
391
+ a = int128_getlo(val), b = int128_gethi(val);
392
+ }
393
+ io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64);
394
+ io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64);
395
+ } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
396
+ /* nothing */
397
+ } else {
398
+ store_atom_16(env, ra, l.page[0].haddr, l.memop, val);
399
+ }
400
+ return;
401
+ }
402
+
403
+ first = l.page[0].size;
404
+ if (first == 8) {
405
+ MemOp mop8 = (l.memop & ~(MO_SIZE | MO_BSWAP)) | MO_64;
406
+
407
+ if (l.memop & MO_BSWAP) {
408
+ val = bswap128(val);
409
+ }
410
+ if (HOST_BIG_ENDIAN) {
411
+ b = int128_getlo(val), a = int128_gethi(val);
412
+ } else {
413
+ a = int128_getlo(val), b = int128_gethi(val);
414
+ }
415
+ do_st_8(env, &l.page[0], a, l.mmu_idx, mop8, ra);
416
+ do_st_8(env, &l.page[1], b, l.mmu_idx, mop8, ra);
417
+ return;
418
+ }
419
+
420
+ if ((l.memop & MO_BSWAP) != MO_LE) {
421
+ val = bswap128(val);
422
+ }
423
+ if (first < 8) {
424
+ do_st_leN(env, &l.page[0], int128_getlo(val), l.mmu_idx, l.memop, ra);
425
+ val = int128_urshift(val, first * 8);
426
+ do_st16_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
427
+ } else {
428
+ b = do_st16_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
429
+ do_st_leN(env, &l.page[1], b, l.mmu_idx, l.memop, ra);
430
+ }
431
+}
432
+
433
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
434
+ MemOpIdx oi, uintptr_t retaddr)
435
+{
436
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
437
+ do_st16_mmu(env, addr, val, oi, retaddr);
438
+}
439
+
440
+void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
441
+ MemOpIdx oi)
442
+{
443
+ helper_st16_mmu(env, addr, val, oi, GETPC());
444
+}
445
+
446
/*
447
* Store Helpers for cpu_ldst.h
448
*/
449
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
450
plugin_store_cb(env, addr, oi);
451
}
452
453
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
454
- MemOpIdx oi, uintptr_t ra)
455
+void cpu_st16_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
456
+ MemOpIdx oi, uintptr_t retaddr)
457
{
458
- MemOp mop = get_memop(oi);
459
- int mmu_idx = get_mmuidx(oi);
460
- MemOpIdx new_oi;
461
- unsigned a_bits;
462
-
463
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
464
- a_bits = get_alignment_bits(mop);
465
-
466
- /* Handle CPU specific unaligned behaviour */
467
- if (addr & ((1 << a_bits) - 1)) {
468
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
469
- mmu_idx, ra);
470
- }
471
-
472
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
473
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
474
- new_oi = make_memop_idx(mop, mmu_idx);
475
-
476
- helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
477
- helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
478
-
479
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
480
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
481
+ do_st16_mmu(env, addr, val, oi, retaddr);
482
+ plugin_store_cb(env, addr, oi);
483
}
484
485
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
486
- MemOpIdx oi, uintptr_t ra)
487
+void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
488
+ MemOpIdx oi, uintptr_t retaddr)
489
{
490
- MemOp mop = get_memop(oi);
491
- int mmu_idx = get_mmuidx(oi);
492
- MemOpIdx new_oi;
493
- unsigned a_bits;
494
-
495
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
496
- a_bits = get_alignment_bits(mop);
497
-
498
- /* Handle CPU specific unaligned behaviour */
499
- if (addr & ((1 << a_bits) - 1)) {
500
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
501
- mmu_idx, ra);
502
- }
503
-
504
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
505
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
506
- new_oi = make_memop_idx(mop, mmu_idx);
507
-
508
- helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
509
- helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
510
-
511
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
512
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
513
+ do_st16_mmu(env, addr, val, oi, retaddr);
514
+ plugin_store_cb(env, addr, oi);
515
}
516
517
#include "ldst_common.c.inc"
518
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
519
index XXXXXXX..XXXXXXX 100644
520
--- a/accel/tcg/user-exec.c
521
+++ b/accel/tcg/user-exec.c
522
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
523
return cpu_to_le64(ret);
524
}
525
526
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
527
- MemOpIdx oi, uintptr_t ra)
528
+static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
529
+ MemOp mop, uintptr_t ra)
530
{
531
void *haddr;
532
Int128 ret;
533
534
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
535
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
536
- memcpy(&ret, haddr, 16);
537
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
538
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
539
+ ret = load_atom_16(env, ra, haddr, mop);
540
clear_helper_retaddr();
541
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
542
+ return ret;
543
+}
544
545
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
546
+ MemOpIdx oi, uintptr_t ra)
547
+{
548
+ MemOp mop = get_memop(oi);
549
+ Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
550
+
551
+ if (mop & MO_BSWAP) {
552
+ ret = bswap128(ret);
553
+ }
554
+ return ret;
555
+}
556
+
557
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
558
+{
559
+ return helper_ld16_mmu(env, addr, oi, GETPC());
560
+}
561
+
562
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
563
+ MemOpIdx oi, uintptr_t ra)
564
+{
565
+ MemOp mop = get_memop(oi);
566
+ Int128 ret;
567
+
568
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
569
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
570
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
571
if (!HOST_BIG_ENDIAN) {
572
ret = bswap128(ret);
573
}
574
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
575
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
576
MemOpIdx oi, uintptr_t ra)
577
{
578
- void *haddr;
579
+ MemOp mop = get_memop(oi);
580
Int128 ret;
581
582
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
583
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
584
- memcpy(&ret, haddr, 16);
585
- clear_helper_retaddr();
586
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
587
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
588
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
589
-
590
if (HOST_BIG_ENDIAN) {
591
ret = bswap128(ret);
592
}
593
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
594
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
595
}
596
597
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
598
- Int128 val, MemOpIdx oi, uintptr_t ra)
599
+static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
600
+ MemOp mop, uintptr_t ra)
601
{
602
void *haddr;
603
604
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
605
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
606
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
607
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
608
+ store_atom_16(env, ra, haddr, mop, val);
609
+ clear_helper_retaddr();
610
+}
611
+
612
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
613
+ MemOpIdx oi, uintptr_t ra)
614
+{
615
+ MemOp mop = get_memop(oi);
616
+
617
+ if (mop & MO_BSWAP) {
618
+ val = bswap128(val);
619
+ }
620
+ do_st16_he_mmu(env, addr, val, mop, ra);
621
+}
622
+
623
+void helper_st_i128(CPUArchState *env, target_ulong addr,
624
+ Int128 val, MemOpIdx oi)
625
+{
626
+ helper_st16_mmu(env, addr, val, oi, GETPC());
627
+}
628
+
629
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
630
+ Int128 val, MemOpIdx oi, uintptr_t ra)
631
+{
632
+ MemOp mop = get_memop(oi);
633
+
634
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
635
if (!HOST_BIG_ENDIAN) {
636
val = bswap128(val);
637
}
638
- memcpy(haddr, &val, 16);
639
- clear_helper_retaddr();
640
+ do_st16_he_mmu(env, addr, val, mop, ra);
641
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
642
}
643
644
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
645
Int128 val, MemOpIdx oi, uintptr_t ra)
646
{
647
- void *haddr;
648
+ MemOp mop = get_memop(oi);
649
650
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
651
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
652
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
653
if (HOST_BIG_ENDIAN) {
654
val = bswap128(val);
655
}
656
- memcpy(haddr, &val, 16);
657
- clear_helper_retaddr();
658
+ do_st16_he_mmu(env, addr, val, mop, ra);
659
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
660
}
661
662
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
663
index XXXXXXX..XXXXXXX 100644
664
--- a/tcg/tcg-op.c
665
+++ b/tcg/tcg-op.c
666
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
667
}
668
}
669
670
+/*
671
+ * Return true if @mop, without knowledge of the pointer alignment,
672
+ * does not require 16-byte atomicity, and it would be adventagous
673
+ * to avoid a call to a helper function.
674
+ */
675
+static bool use_two_i64_for_i128(MemOp mop)
676
+{
677
+#ifdef CONFIG_SOFTMMU
678
+ /* Two softmmu tlb lookups is larger than one function call. */
679
+ return false;
680
+#else
681
+ /*
682
+ * For user-only, two 64-bit operations may well be smaller than a call.
683
+ * Determine if that would be legal for the requested atomicity.
684
+ */
685
+ switch (mop & MO_ATOM_MASK) {
686
+ case MO_ATOM_NONE:
687
+ case MO_ATOM_IFALIGN_PAIR:
688
+ return true;
689
+ case MO_ATOM_IFALIGN:
690
+ case MO_ATOM_SUBALIGN:
691
+ case MO_ATOM_WITHIN16:
692
+ case MO_ATOM_WITHIN16_PAIR:
693
+ /* In a serialized context, no atomicity is required. */
694
+ return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
695
+ default:
696
+ g_assert_not_reached();
697
+ }
698
+#endif
699
+}
700
+
701
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
702
{
703
MemOp mop_1 = orig, mop_2;
704
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
705
ret[1] = mop_2;
706
}
707
708
+#if TARGET_LONG_BITS == 64
709
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
710
+#else
711
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
712
+#endif
713
+
714
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
715
{
716
- MemOp mop[2];
717
- TCGv addr_p8;
718
- TCGv_i64 x, y;
719
+ MemOpIdx oi = make_memop_idx(memop, idx);
720
721
- canonicalize_memop_i128_as_i64(mop, memop);
722
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
723
+ tcg_debug_assert((memop & MO_SIGN) == 0);
724
725
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
726
addr = plugin_prep_mem_callbacks(addr);
727
728
- /* TODO: respect atomicity of the operation. */
729
/* TODO: allow the tcg backend to see the whole operation. */
730
731
- /*
732
- * Since there are no global TCGv_i128, there is no visible state
733
- * changed if the second load faults. Load directly into the two
734
- * subwords.
735
- */
736
- if ((memop & MO_BSWAP) == MO_LE) {
737
- x = TCGV128_LOW(val);
738
- y = TCGV128_HIGH(val);
739
+ if (use_two_i64_for_i128(memop)) {
740
+ MemOp mop[2];
741
+ TCGv addr_p8;
742
+ TCGv_i64 x, y;
743
+
744
+ canonicalize_memop_i128_as_i64(mop, memop);
745
+
746
+ /*
747
+ * Since there are no global TCGv_i128, there is no visible state
748
+ * changed if the second load faults. Load directly into the two
749
+ * subwords.
750
+ */
751
+ if ((memop & MO_BSWAP) == MO_LE) {
752
+ x = TCGV128_LOW(val);
753
+ y = TCGV128_HIGH(val);
754
+ } else {
755
+ x = TCGV128_HIGH(val);
756
+ y = TCGV128_LOW(val);
757
+ }
758
+
759
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
760
+
761
+ if ((mop[0] ^ memop) & MO_BSWAP) {
762
+ tcg_gen_bswap64_i64(x, x);
763
+ }
764
+
765
+ addr_p8 = tcg_temp_ebb_new();
766
+ tcg_gen_addi_tl(addr_p8, addr, 8);
767
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
768
+ tcg_temp_free(addr_p8);
769
+
770
+ if ((mop[0] ^ memop) & MO_BSWAP) {
771
+ tcg_gen_bswap64_i64(y, y);
772
+ }
773
} else {
774
- x = TCGV128_HIGH(val);
775
- y = TCGV128_LOW(val);
776
+ gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
777
}
778
779
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
780
-
781
- if ((mop[0] ^ memop) & MO_BSWAP) {
782
- tcg_gen_bswap64_i64(x, x);
783
- }
784
-
785
- addr_p8 = tcg_temp_new();
786
- tcg_gen_addi_tl(addr_p8, addr, 8);
787
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
788
- tcg_temp_free(addr_p8);
789
-
790
- if ((mop[0] ^ memop) & MO_BSWAP) {
791
- tcg_gen_bswap64_i64(y, y);
792
- }
793
-
794
- plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
795
- QEMU_PLUGIN_MEM_R);
796
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
797
}
798
799
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
800
{
801
- MemOp mop[2];
802
- TCGv addr_p8;
803
- TCGv_i64 x, y;
804
+ MemOpIdx oi = make_memop_idx(memop, idx);
805
806
- canonicalize_memop_i128_as_i64(mop, memop);
807
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
808
+ tcg_debug_assert((memop & MO_SIGN) == 0);
809
810
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
811
addr = plugin_prep_mem_callbacks(addr);
812
813
- /* TODO: respect atomicity of the operation. */
814
/* TODO: allow the tcg backend to see the whole operation. */
815
816
- if ((memop & MO_BSWAP) == MO_LE) {
817
- x = TCGV128_LOW(val);
818
- y = TCGV128_HIGH(val);
819
+ if (use_two_i64_for_i128(memop)) {
820
+ MemOp mop[2];
821
+ TCGv addr_p8;
822
+ TCGv_i64 x, y;
823
+
824
+ canonicalize_memop_i128_as_i64(mop, memop);
825
+
826
+ if ((memop & MO_BSWAP) == MO_LE) {
827
+ x = TCGV128_LOW(val);
828
+ y = TCGV128_HIGH(val);
829
+ } else {
830
+ x = TCGV128_HIGH(val);
831
+ y = TCGV128_LOW(val);
832
+ }
833
+
834
+ addr_p8 = tcg_temp_ebb_new();
835
+ if ((mop[0] ^ memop) & MO_BSWAP) {
836
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
837
+
838
+ tcg_gen_bswap64_i64(t, x);
839
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
840
+ tcg_gen_bswap64_i64(t, y);
841
+ tcg_gen_addi_tl(addr_p8, addr, 8);
842
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
843
+ tcg_temp_free_i64(t);
844
+ } else {
845
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
846
+ tcg_gen_addi_tl(addr_p8, addr, 8);
847
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
848
+ }
849
+ tcg_temp_free(addr_p8);
850
} else {
851
- x = TCGV128_HIGH(val);
852
- y = TCGV128_LOW(val);
853
+ gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
854
}
855
856
- addr_p8 = tcg_temp_new();
857
- if ((mop[0] ^ memop) & MO_BSWAP) {
858
- TCGv_i64 t = tcg_temp_ebb_new_i64();
859
-
860
- tcg_gen_bswap64_i64(t, x);
861
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
862
- tcg_gen_bswap64_i64(t, y);
863
- tcg_gen_addi_tl(addr_p8, addr, 8);
864
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
865
- tcg_temp_free_i64(t);
866
- } else {
867
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
868
- tcg_gen_addi_tl(addr_p8, addr, 8);
869
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
870
- }
871
- tcg_temp_free(addr_p8);
872
-
873
- plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
874
- QEMU_PLUGIN_MEM_W);
875
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
876
}
877
878
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
879
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
880
index XXXXXXX..XXXXXXX 100644
881
--- a/accel/tcg/ldst_atomicity.c.inc
882
+++ b/accel/tcg/ldst_atomicity.c.inc
883
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atom_8_by_4(void *pv)
884
}
885
}
886
887
+/**
888
+ * load_atom_8_by_8_or_4:
889
+ * @pv: host address
890
+ *
891
+ * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
892
+ */
893
+static inline uint64_t load_atom_8_by_8_or_4(void *pv)
894
+{
895
+ if (HAVE_al8_fast) {
896
+ return load_atomic8(pv);
897
+ } else {
898
+ return load_atom_8_by_4(pv);
899
+ }
900
+}
901
+
902
/**
903
* load_atom_2:
904
* @p: host address
905
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
906
}
907
}
908
909
+/**
910
+ * load_atom_16:
911
+ * @p: host address
912
+ * @memop: the full memory op
913
+ *
914
+ * Load 16 bytes from @p, honoring the atomicity of @memop.
915
+ */
916
+static Int128 load_atom_16(CPUArchState *env, uintptr_t ra,
917
+ void *pv, MemOp memop)
918
+{
919
+ uintptr_t pi = (uintptr_t)pv;
920
+ int atmax;
921
+ Int128 r;
922
+ uint64_t a, b;
923
+
924
+ /*
925
+ * If the host does not support 16-byte atomics, wait until we have
926
+ * examined the atomicity parameters below.
927
+ */
928
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
929
+ return load_atomic16(pv);
930
+ }
931
+
932
+ atmax = required_atomicity(env, pi, memop);
933
+ switch (atmax) {
934
+ case MO_8:
935
+ memcpy(&r, pv, 16);
936
+ return r;
937
+ case MO_16:
938
+ a = load_atom_8_by_2(pv);
939
+ b = load_atom_8_by_2(pv + 8);
940
+ break;
941
+ case MO_32:
942
+ a = load_atom_8_by_4(pv);
943
+ b = load_atom_8_by_4(pv + 8);
944
+ break;
945
+ case MO_64:
946
+ if (!HAVE_al8) {
947
+ cpu_loop_exit_atomic(env_cpu(env), ra);
948
+ }
949
+ a = load_atomic8(pv);
950
+ b = load_atomic8(pv + 8);
951
+ break;
952
+ case -MO_64:
953
+ if (!HAVE_al8) {
954
+ cpu_loop_exit_atomic(env_cpu(env), ra);
955
+ }
956
+ a = load_atom_extract_al8x2(pv);
957
+ b = load_atom_extract_al8x2(pv + 8);
958
+ break;
959
+ case MO_128:
960
+ return load_atomic16_or_exit(env, ra, pv);
961
+ default:
962
+ g_assert_not_reached();
963
+ }
964
+ return int128_make128(HOST_BIG_ENDIAN ? b : a, HOST_BIG_ENDIAN ? a : b);
965
+}
966
+
967
/**
968
* store_atomic2:
969
* @pv: host address
970
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
971
qatomic_set__nocheck(p, val);
972
}
973
974
+/**
975
+ * store_atomic16:
976
+ * @pv: host address
977
+ * @val: value to store
978
+ *
979
+ * Atomically store 16 aligned bytes to @pv.
980
+ */
981
+static inline void store_atomic16(void *pv, Int128Alias val)
982
+{
983
+#if defined(CONFIG_ATOMIC128)
984
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
985
+ qatomic_set__nocheck(pu, val.u);
986
+#elif defined(CONFIG_CMPXCHG128)
987
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
988
+ __uint128_t o;
989
+
990
+ /*
991
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
992
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
993
+ * and accept the sequential consistency that comes with it.
994
+ */
995
+ do {
996
+ o = *pu;
997
+ } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
998
+#else
999
+ qemu_build_not_reached();
1000
+#endif
1001
+}
1002
+
1003
/**
1004
* store_atom_4x2
1005
*/
1006
@@ -XXX,XX +XXX,XX @@ static void store_atom_8(CPUArchState *env, uintptr_t ra,
1007
}
1008
cpu_loop_exit_atomic(env_cpu(env), ra);
1009
}
1010
+
1011
+/**
1012
+ * store_atom_16:
1013
+ * @p: host address
1014
+ * @val: the value to store
1015
+ * @memop: the full memory op
1016
+ *
1017
+ * Store 16 bytes to @p, honoring the atomicity of @memop.
1018
+ */
1019
+static void store_atom_16(CPUArchState *env, uintptr_t ra,
1020
+ void *pv, MemOp memop, Int128 val)
1021
+{
1022
+ uintptr_t pi = (uintptr_t)pv;
1023
+ uint64_t a, b;
1024
+ int atmax;
1025
+
1026
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
1027
+ store_atomic16(pv, val);
1028
+ return;
1029
+ }
1030
+
1031
+ atmax = required_atomicity(env, pi, memop);
1032
+
1033
+ a = HOST_BIG_ENDIAN ? int128_gethi(val) : int128_getlo(val);
1034
+ b = HOST_BIG_ENDIAN ? int128_getlo(val) : int128_gethi(val);
1035
+ switch (atmax) {
1036
+ case MO_8:
1037
+ memcpy(pv, &val, 16);
1038
+ return;
1039
+ case MO_16:
1040
+ store_atom_8_by_2(pv, a);
1041
+ store_atom_8_by_2(pv + 8, b);
1042
+ return;
1043
+ case MO_32:
1044
+ store_atom_8_by_4(pv, a);
1045
+ store_atom_8_by_4(pv + 8, b);
1046
+ return;
1047
+ case MO_64:
1048
+ if (HAVE_al8) {
1049
+ store_atomic8(pv, a);
1050
+ store_atomic8(pv + 8, b);
1051
+ return;
1052
+ }
1053
+ break;
1054
+ case -MO_64:
1055
+ if (HAVE_al16) {
1056
+ uint64_t val_le;
1057
+ int s2 = pi & 15;
1058
+ int s1 = 16 - s2;
1059
+
1060
+ if (HOST_BIG_ENDIAN) {
1061
+ val = bswap128(val);
1062
+ }
1063
+ switch (s2) {
1064
+ case 1 ... 7:
1065
+ val_le = store_whole_le16(pv, s1, val);
1066
+ store_bytes_leN(pv + s1, s2, val_le);
1067
+ break;
1068
+ case 9 ... 15:
1069
+ store_bytes_leN(pv, s1, int128_getlo(val));
1070
+ val = int128_urshift(val, s1 * 8);
1071
+ store_whole_le16(pv + s1, s2, val);
1072
+ break;
1073
+ case 0: /* aligned */
1074
+ case 8: /* atmax MO_64 */
1075
+ default:
1076
+ g_assert_not_reached();
1077
+ }
1078
+ return;
1079
+ }
1080
+ break;
1081
+ case MO_128:
1082
+ if (HAVE_al16) {
1083
+ store_atomic16(pv, val);
1084
+ return;
1085
+ }
1086
+ break;
1087
+ default:
1088
+ g_assert_not_reached();
1089
+ }
1090
+ cpu_loop_exit_atomic(env_cpu(env), ra);
1091
+}
1092
--
1093
2.34.1
diff view generated by jsdifflib
New patch
1
There is an edge condition prior to gcc13 for which optimization
2
is required to generate 16-byte atomic sequences. Detect this.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
meson.build | 52 ++++++++++++++++++++++------------
8
accel/tcg/ldst_atomicity.c.inc | 29 ++++++++++++++++---
9
2 files changed, 59 insertions(+), 22 deletions(-)
10
11
diff --git a/meson.build b/meson.build
12
index XXXXXXX..XXXXXXX 100644
13
--- a/meson.build
14
+++ b/meson.build
15
@@ -XXX,XX +XXX,XX @@ config_host_data.set('HAVE_BROKEN_SIZE_MAX', not cc.compiles('''
16
return printf("%zu", SIZE_MAX);
17
}''', args: ['-Werror']))
18
19
-atomic_test = '''
20
+# See if 64-bit atomic operations are supported.
21
+# Note that without __atomic builtins, we can only
22
+# assume atomic loads/stores max at pointer size.
23
+config_host_data.set('CONFIG_ATOMIC64', cc.links('''
24
#include <stdint.h>
25
int main(void)
26
{
27
- @0@ x = 0, y = 0;
28
+ uint64_t x = 0, y = 0;
29
y = __atomic_load_n(&x, __ATOMIC_RELAXED);
30
__atomic_store_n(&x, y, __ATOMIC_RELAXED);
31
__atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
32
__atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
33
__atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
34
return 0;
35
- }'''
36
-
37
-# See if 64-bit atomic operations are supported.
38
-# Note that without __atomic builtins, we can only
39
-# assume atomic loads/stores max at pointer size.
40
-config_host_data.set('CONFIG_ATOMIC64', cc.links(atomic_test.format('uint64_t')))
41
+ }'''))
42
43
has_int128 = cc.links('''
44
__int128_t a;
45
@@ -XXX,XX +XXX,XX @@ if has_int128
46
# "do we have 128-bit atomics which are handled inline and specifically not
47
# via libatomic". The reason we can't use libatomic is documented in the
48
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
49
- has_atomic128 = cc.links(atomic_test.format('unsigned __int128'))
50
+ # We only care about these operations on 16-byte aligned pointers, so
51
+ # force 16-byte alignment of the pointer, which may be greater than
52
+ # __alignof(unsigned __int128) for the host.
53
+ atomic_test_128 = '''
54
+ int main(int ac, char **av) {
55
+ unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16));
56
+ p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
57
+ __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
58
+ __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
59
+ return 0;
60
+ }'''
61
+ has_atomic128 = cc.links(atomic_test_128)
62
63
config_host_data.set('CONFIG_ATOMIC128', has_atomic128)
64
65
if not has_atomic128
66
- has_cmpxchg128 = cc.links('''
67
- int main(void)
68
- {
69
- unsigned __int128 x = 0, y = 0;
70
- __sync_val_compare_and_swap_16(&x, y, x);
71
- return 0;
72
- }
73
- ''')
74
+ # Even with __builtin_assume_aligned, the above test may have failed
75
+ # without optimization enabled. Try again with optimizations locally
76
+ # enabled for the function. See
77
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
78
+ has_atomic128_opt = cc.links('__attribute__((optimize("O1")))' + atomic_test_128)
79
+ config_host_data.set('CONFIG_ATOMIC128_OPT', has_atomic128_opt)
80
81
- config_host_data.set('CONFIG_CMPXCHG128', has_cmpxchg128)
82
+ if not has_atomic128_opt
83
+ config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
84
+ int main(void)
85
+ {
86
+ unsigned __int128 x = 0, y = 0;
87
+ __sync_val_compare_and_swap_16(&x, y, x);
88
+ return 0;
89
+ }
90
+ '''))
91
+ endif
92
endif
93
endif
94
95
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
96
index XXXXXXX..XXXXXXX 100644
97
--- a/accel/tcg/ldst_atomicity.c.inc
98
+++ b/accel/tcg/ldst_atomicity.c.inc
99
@@ -XXX,XX +XXX,XX @@
100
#endif
101
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
102
103
+/*
104
+ * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
105
+ * that are supported by the host, e.g. s390x. We can force the pointer to
106
+ * have our known alignment with __builtin_assume_aligned, however prior to
107
+ * GCC 13 that was only reliable with optimization enabled. See
108
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
109
+ */
110
+#if defined(CONFIG_ATOMIC128_OPT)
111
+# if !defined(__OPTIMIZE__)
112
+# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
113
+# endif
114
+# define CONFIG_ATOMIC128
115
+#endif
116
+#ifndef ATTRIBUTE_ATOMIC128_OPT
117
+# define ATTRIBUTE_ATOMIC128_OPT
118
+#endif
119
+
120
#if defined(CONFIG_ATOMIC128)
121
# define HAVE_al16_fast true
122
#else
123
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atomic8(void *pv)
124
*
125
* Atomically load 16 aligned bytes from @pv.
126
*/
127
-static inline Int128 load_atomic16(void *pv)
128
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
129
+load_atomic16(void *pv)
130
{
131
#ifdef CONFIG_ATOMIC128
132
__uint128_t *p = __builtin_assume_aligned(pv, 16);
133
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
134
* cross an 16-byte boundary then the access must be 16-byte atomic,
135
* otherwise the access must be 8-byte atomic.
136
*/
137
-static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
138
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
139
+load_atom_extract_al16_or_al8(void *pv, int s)
140
{
141
#if defined(CONFIG_ATOMIC128)
142
uintptr_t pi = (uintptr_t)pv;
143
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
144
*
145
* Atomically store 16 aligned bytes to @pv.
146
*/
147
-static inline void store_atomic16(void *pv, Int128Alias val)
148
+static inline void ATTRIBUTE_ATOMIC128_OPT
149
+store_atomic16(void *pv, Int128Alias val)
150
{
151
#if defined(CONFIG_ATOMIC128)
152
__uint128_t *pu = __builtin_assume_aligned(pv, 16);
153
@@ -XXX,XX +XXX,XX @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
154
*
155
* Atomically store @val to @p masked by @msk.
156
*/
157
-static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
158
+static void ATTRIBUTE_ATOMIC128_OPT
159
+store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
160
{
161
#if defined(CONFIG_ATOMIC128)
162
__uint128_t *pu, old, new;
163
--
164
2.34.1
diff view generated by jsdifflib
New patch
1
Notice when Intel or AMD have guaranteed that vmovdqa is atomic.
2
The new variable will also be used in generated code.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/qemu/cpuid.h | 18 ++++++++++++++++++
8
tcg/i386/tcg-target.h | 1 +
9
tcg/i386/tcg-target.c.inc | 27 +++++++++++++++++++++++++++
10
3 files changed, 46 insertions(+)
11
12
diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/qemu/cpuid.h
15
+++ b/include/qemu/cpuid.h
16
@@ -XXX,XX +XXX,XX @@
17
#define bit_LZCNT (1 << 5)
18
#endif
19
20
+/*
21
+ * Signatures for different CPU implementations as returned from Leaf 0.
22
+ */
23
+
24
+#ifndef signature_INTEL_ecx
25
+/* "Genu" "ineI" "ntel" */
26
+#define signature_INTEL_ebx 0x756e6547
27
+#define signature_INTEL_edx 0x49656e69
28
+#define signature_INTEL_ecx 0x6c65746e
29
+#endif
30
+
31
+#ifndef signature_AMD_ecx
32
+/* "Auth" "enti" "cAMD" */
33
+#define signature_AMD_ebx 0x68747541
34
+#define signature_AMD_edx 0x69746e65
35
+#define signature_AMD_ecx 0x444d4163
36
+#endif
37
+
38
static inline unsigned xgetbv_low(unsigned c)
39
{
40
unsigned a, d;
41
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/tcg/i386/tcg-target.h
44
+++ b/tcg/i386/tcg-target.h
45
@@ -XXX,XX +XXX,XX @@ extern bool have_avx512dq;
46
extern bool have_avx512vbmi2;
47
extern bool have_avx512vl;
48
extern bool have_movbe;
49
+extern bool have_atomic16;
50
51
/* optional instructions */
52
#define TCG_TARGET_HAS_div2_i32 1
53
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/i386/tcg-target.c.inc
56
+++ b/tcg/i386/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ bool have_avx512dq;
58
bool have_avx512vbmi2;
59
bool have_avx512vl;
60
bool have_movbe;
61
+bool have_atomic16;
62
63
#ifdef CONFIG_CPUID_H
64
static bool have_bmi2;
65
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
66
have_avx512dq = (b7 & bit_AVX512DQ) != 0;
67
have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
68
}
69
+
70
+ /*
71
+ * The Intel SDM has added:
72
+ * Processors that enumerate support for Intel® AVX
73
+ * (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
74
+ * guarantee that the 16-byte memory operations performed
75
+ * by the following instructions will always be carried
76
+ * out atomically:
77
+ * - MOVAPD, MOVAPS, and MOVDQA.
78
+ * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
79
+ * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
80
+ * with EVEX.128 and k0 (masking disabled).
81
+ * Note that these instructions require the linear addresses
82
+ * of their memory operands to be 16-byte aligned.
83
+ *
84
+ * AMD has provided an even stronger guarantee that processors
85
+ * with AVX provide 16-byte atomicity for all cachable,
86
+ * naturally aligned single loads and stores, e.g. MOVDQU.
87
+ *
88
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
89
+ */
90
+ if (have_avx1) {
91
+ __cpuid(0, a, b, c, d);
92
+ have_atomic16 = (c == signature_INTEL_ecx ||
93
+ c == signature_AMD_ecx);
94
+ }
95
}
96
}
97
}
98
--
99
2.34.1
100
101
diff view generated by jsdifflib
New patch
1
Notice when the host has additional atomic instructions.
2
The new variables will also be used in generated code.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.h | 3 +++
9
tcg/aarch64/tcg-target.c.inc | 12 ++++++++++++
10
2 files changed, 15 insertions(+)
11
12
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/aarch64/tcg-target.h
15
+++ b/tcg/aarch64/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ typedef enum {
17
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
18
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
19
20
+extern bool have_lse;
21
+extern bool have_lse2;
22
+
23
/* optional instructions */
24
#define TCG_TARGET_HAS_div_i32 1
25
#define TCG_TARGET_HAS_rem_i32 1
26
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/aarch64/tcg-target.c.inc
29
+++ b/tcg/aarch64/tcg-target.c.inc
30
@@ -XXX,XX +XXX,XX @@
31
#include "../tcg-ldst.c.inc"
32
#include "../tcg-pool.c.inc"
33
#include "qemu/bitops.h"
34
+#ifdef __linux__
35
+#include <asm/hwcap.h>
36
+#endif
37
38
/* We're going to re-use TCGType in setting of the SF bit, which controls
39
the size of the operation performed. If we know the values match, it
40
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
41
return TCG_REG_X0 + slot;
42
}
43
44
+bool have_lse;
45
+bool have_lse2;
46
+
47
#define TCG_REG_TMP TCG_REG_X30
48
#define TCG_VEC_TMP TCG_REG_V31
49
50
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
51
52
static void tcg_target_init(TCGContext *s)
53
{
54
+#ifdef __linux__
55
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
56
+ have_lse = hwcap & HWCAP_ATOMICS;
57
+ have_lse2 = hwcap & HWCAP_USCAT;
58
+#endif
59
+
60
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
61
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
62
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
63
--
64
2.34.1
65
66
diff view generated by jsdifflib
New patch
1
These features are present for Apple M1.
1
2
3
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.c.inc | 28 ++++++++++++++++++++++++++++
9
1 file changed, 28 insertions(+)
10
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@
16
#ifdef __linux__
17
#include <asm/hwcap.h>
18
#endif
19
+#ifdef CONFIG_DARWIN
20
+#include <sys/sysctl.h>
21
+#endif
22
23
/* We're going to re-use TCGType in setting of the SF bit, which controls
24
the size of the operation performed. If we know the values match, it
25
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
26
}
27
}
28
29
+#ifdef CONFIG_DARWIN
30
+static bool sysctl_for_bool(const char *name)
31
+{
32
+ int val = 0;
33
+ size_t len = sizeof(val);
34
+
35
+ if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
36
+ return val != 0;
37
+ }
38
+
39
+ /*
40
+ * We might in the future ask for properties not present in older kernels,
41
+ * but we're only asking about static properties, all of which should be
42
+ * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
43
+ * more exotic errors.
44
+ */
45
+ assert(errno == ENOENT);
46
+ return false;
47
+}
48
+#endif
49
+
50
static void tcg_target_init(TCGContext *s)
51
{
52
#ifdef __linux__
53
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
54
have_lse = hwcap & HWCAP_ATOMICS;
55
have_lse2 = hwcap & HWCAP_USCAT;
56
#endif
57
+#ifdef CONFIG_DARWIN
58
+ have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
59
+ have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
60
+#endif
61
62
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
63
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
64
--
65
2.34.1
66
67
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 52 +++------------------------------------
9
1 file changed, 4 insertions(+), 48 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
int seg;
17
} HostAddress;
18
19
-#if defined(CONFIG_SOFTMMU)
20
/*
21
* Because i686 has no register parameters and because x86_64 has xchg
22
* to handle addr/data register overlap, we have placed all input arguments
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
25
/* resolve label address */
26
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
27
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
28
+ if (label_ptr[1]) {
29
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
30
}
31
32
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
33
34
/* resolve label address */
35
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
36
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
37
+ if (label_ptr[1]) {
38
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
42
tcg_out_jmp(s, l->raddr);
43
return true;
44
}
45
-#else
46
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
47
-{
48
- /* resolve label address */
49
- tcg_patch32(l->label_ptr[0], s->code_ptr - l->label_ptr[0] - 4);
50
-
51
- if (TCG_TARGET_REG_BITS == 32) {
52
- int ofs = 0;
53
-
54
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
55
- ofs += 4;
56
-
57
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
58
- ofs += 4;
59
- if (TARGET_LONG_BITS == 64) {
60
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
61
- ofs += 4;
62
- }
63
-
64
- tcg_out_pushi(s, (uintptr_t)l->raddr);
65
- } else {
66
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
67
- l->addrlo_reg);
68
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
69
-
70
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, (uintptr_t)l->raddr);
71
- tcg_out_push(s, TCG_REG_RAX);
72
- }
73
-
74
- /* "Tail call" to the helper, with the return address back inline. */
75
- tcg_out_jmp(s, (const void *)(l->is_ld ? helper_unaligned_ld
76
- : helper_unaligned_st));
77
- return true;
78
-}
79
-
80
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
81
-{
82
- return tcg_out_fail_alignment(s, l);
83
-}
84
-
85
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
86
-{
87
- return tcg_out_fail_alignment(s, l);
88
-}
89
90
+#ifndef CONFIG_SOFTMMU
91
static HostAddress x86_guest_base = {
92
.index = -1
93
};
94
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
95
return 0;
96
}
97
#endif /* setup_guest_base_seg */
98
-#endif /* SOFTMMU */
99
+#endif /* !SOFTMMU */
100
101
/*
102
* For softmmu, perform the TLB load and compare.
103
--
104
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.c.inc | 35 -----------------------------------
9
1 file changed, 35 deletions(-)
10
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
TCGType index_ext;
17
} HostAddress;
18
19
-#ifdef CONFIG_SOFTMMU
20
static const TCGLdstHelperParam ldst_helper_param = {
21
.ntmp = 1, .tmp = { TCG_REG_TMP }
22
};
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_goto(s, lb->raddr);
25
return true;
26
}
27
-#else
28
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
29
-{
30
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
31
- tcg_debug_assert(offset == sextract64(offset, 0, 21));
32
- tcg_out_insn(s, 3406, ADR, rd, offset);
33
-}
34
-
35
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
36
-{
37
- if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
38
- return false;
39
- }
40
-
41
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
42
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
43
-
44
- /* "Tail call" to the helper, with the return address back inline. */
45
- tcg_out_adr(s, TCG_REG_LR, l->raddr);
46
- tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
47
- : helper_unaligned_st));
48
- return true;
49
-}
50
-
51
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
52
-{
53
- return tcg_out_fail_alignment(s, l);
54
-}
55
-
56
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
57
-{
58
- return tcg_out_fail_alignment(s, l);
59
-}
60
-#endif /* CONFIG_SOFTMMU */
61
62
/*
63
* For softmmu, perform the TLB load and compare.
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.c.inc | 44 ----------------------------------------
9
1 file changed, 44 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/ppc/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
16
[MO_BSWAP | MO_UQ] = STDBRX,
17
};
18
19
-#if defined (CONFIG_SOFTMMU)
20
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
{
22
if (arg < 0) {
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_b(s, 0, lb->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
- return false;
32
- }
33
-
34
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
35
- TCGReg arg = TCG_REG_R4;
36
-
37
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
38
- if (l->addrlo_reg != arg) {
39
- tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
40
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
41
- } else if (l->addrhi_reg != arg + 1) {
42
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
43
- tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
44
- } else {
45
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
46
- tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
47
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
48
- }
49
- } else {
50
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
51
- }
52
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
53
-
54
- /* "Tail call" to the helper, with the return address back inline. */
55
- tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
56
- : helper_unaligned_st));
57
- return true;
58
-}
59
-
60
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
61
-{
62
- return tcg_out_fail_alignment(s, l);
63
-}
64
-
65
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
66
-{
67
- return tcg_out_fail_alignment(s, l);
68
-}
69
-#endif /* SOFTMMU */
70
71
typedef struct {
72
TCGReg base;
73
--
74
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/loongarch64/tcg-target.c.inc | 30 ------------------------------
9
1 file changed, 30 deletions(-)
10
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/loongarch64/tcg-target.c.inc
14
+++ b/tcg/loongarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
16
* Load/store helpers for SoftMMU, and qemu_ld/st implementations
17
*/
18
19
-#if defined(CONFIG_SOFTMMU)
20
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
21
{
22
tcg_out_opc_b(s, 0);
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
25
return tcg_out_goto(s, l->raddr);
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- /* resolve label address */
31
- if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
37
-
38
- /* tail call, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
40
- tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st), true);
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-
55
-#endif /* CONFIG_SOFTMMU */
56
57
typedef struct {
58
TCGReg base;
59
--
60
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/riscv/tcg-target.c.inc | 29 -----------------------------
9
1 file changed, 29 deletions(-)
10
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/riscv/tcg-target.c.inc
14
+++ b/tcg/riscv/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
16
* Load/store and TLB
17
*/
18
19
-#if defined(CONFIG_SOFTMMU)
20
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
21
{
22
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
tcg_out_goto(s, l->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- /* resolve label address */
31
- if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
37
-
38
- /* tail call, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
40
- tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st), true);
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-#endif /* CONFIG_SOFTMMU */
55
56
/*
57
* For softmmu, perform the TLB load and compare.
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Always reserve r3 for tlb softmmu lookup. Fix a bug in user-only
2
ALL_QLDST_REGS, in that r14 is clobbered by the BLNE that leads
3
to the misaligned trap. Remove r0+r1 from user-only ALL_QLDST_REGS;
4
I believe these had been reserved for bswap, which we no longer
5
perform during qemu_st.
1
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/arm/tcg-target-con-set.h | 16 ++++++++--------
11
tcg/arm/tcg-target-con-str.h | 5 ++---
12
tcg/arm/tcg-target.c.inc | 23 ++++++++---------------
13
3 files changed, 18 insertions(+), 26 deletions(-)
14
15
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/arm/tcg-target-con-set.h
18
+++ b/tcg/arm/tcg-target-con-set.h
19
@@ -XXX,XX +XXX,XX @@
20
C_O0_I1(r)
21
C_O0_I2(r, r)
22
C_O0_I2(r, rIN)
23
-C_O0_I2(s, s)
24
+C_O0_I2(q, q)
25
C_O0_I2(w, r)
26
-C_O0_I3(s, s, s)
27
-C_O0_I3(S, p, s)
28
+C_O0_I3(q, q, q)
29
+C_O0_I3(Q, p, q)
30
C_O0_I4(r, r, rI, rI)
31
-C_O0_I4(S, p, s, s)
32
-C_O1_I1(r, l)
33
+C_O0_I4(Q, p, q, q)
34
+C_O1_I1(r, q)
35
C_O1_I1(r, r)
36
C_O1_I1(w, r)
37
C_O1_I1(w, w)
38
C_O1_I1(w, wr)
39
C_O1_I2(r, 0, rZ)
40
-C_O1_I2(r, l, l)
41
+C_O1_I2(r, q, q)
42
C_O1_I2(r, r, r)
43
C_O1_I2(r, r, rI)
44
C_O1_I2(r, r, rIK)
45
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wZ)
46
C_O1_I3(w, w, w, w)
47
C_O1_I4(r, r, r, rI, rI)
48
C_O1_I4(r, r, rIN, rIK, 0)
49
-C_O2_I1(e, p, l)
50
-C_O2_I2(e, p, l, l)
51
+C_O2_I1(e, p, q)
52
+C_O2_I2(e, p, q, q)
53
C_O2_I2(r, r, r, r)
54
C_O2_I4(r, r, r, r, rIN, rIK)
55
C_O2_I4(r, r, rI, rI, rIN, rIK)
56
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
57
index XXXXXXX..XXXXXXX 100644
58
--- a/tcg/arm/tcg-target-con-str.h
59
+++ b/tcg/arm/tcg-target-con-str.h
60
@@ -XXX,XX +XXX,XX @@
61
*/
62
REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
63
REGS('r', ALL_GENERAL_REGS)
64
-REGS('l', ALL_QLOAD_REGS)
65
-REGS('s', ALL_QSTORE_REGS)
66
-REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */
67
+REGS('q', ALL_QLDST_REGS)
68
+REGS('Q', ALL_QLDST_REGS & 0x5555) /* even qldst */
69
REGS('w', ALL_VECTOR_REGS)
70
71
/*
72
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
73
index XXXXXXX..XXXXXXX 100644
74
--- a/tcg/arm/tcg-target.c.inc
75
+++ b/tcg/arm/tcg-target.c.inc
76
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
77
#define ALL_VECTOR_REGS 0xffff0000u
78
79
/*
80
- * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
81
- * and r0-r1 doing the byte swapping, so don't use these.
82
- * r3 is removed for softmmu to avoid clashes with helper arguments.
83
+ * r0-r3 will be overwritten when reading the tlb entry (softmmu only);
84
+ * r14 will be overwritten by the BLNE branching to the slow path.
85
*/
86
#ifdef CONFIG_SOFTMMU
87
-#define ALL_QLOAD_REGS \
88
+#define ALL_QLDST_REGS \
89
(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
90
(1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
91
(1 << TCG_REG_R14)))
92
-#define ALL_QSTORE_REGS \
93
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
94
- (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
95
- ((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
96
#else
97
-#define ALL_QLOAD_REGS ALL_GENERAL_REGS
98
-#define ALL_QSTORE_REGS \
99
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
100
+#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R14))
101
#endif
102
103
/*
104
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
105
return C_O1_I4(r, r, r, rI, rI);
106
107
case INDEX_op_qemu_ld_i32:
108
- return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
109
+ return TARGET_LONG_BITS == 32 ? C_O1_I1(r, q) : C_O1_I2(r, q, q);
110
case INDEX_op_qemu_ld_i64:
111
- return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
112
+ return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, q) : C_O2_I2(e, p, q, q);
113
case INDEX_op_qemu_st_i32:
114
- return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
115
+ return TARGET_LONG_BITS == 32 ? C_O0_I2(q, q) : C_O0_I3(q, q, q);
116
case INDEX_op_qemu_st_i64:
117
- return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
118
+ return TARGET_LONG_BITS == 32 ? C_O0_I3(Q, p, q) : C_O0_I4(Q, p, q, q);
119
120
case INDEX_op_st_vec:
121
return C_O0_I2(w, r);
122
--
123
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/arm/tcg-target.c.inc | 45 ----------------------------------------
9
1 file changed, 45 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
bool index_scratch;
17
} HostAddress;
18
19
-#ifdef CONFIG_SOFTMMU
20
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
{
22
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
- return false;
32
- }
33
-
34
- if (TARGET_LONG_BITS == 64) {
35
- /* 64-bit target address is aligned into R2:R3. */
36
- TCGMovExtend ext[2] = {
37
- { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
38
- .src = l->addrlo_reg,
39
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
40
- { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
41
- .src = l->addrhi_reg,
42
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
43
- };
44
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
45
- } else {
46
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
47
- }
48
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
49
-
50
- /*
51
- * Tail call to the helper, with the return address back inline,
52
- * just for the clarity of the debugging traceback -- the helper
53
- * cannot return. We have used BLNE to arrive here, so LR is
54
- * already set.
55
- */
56
- tcg_out_goto(s, COND_AL, (const void *)
57
- (l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
58
- return true;
59
-}
60
-
61
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
62
-{
63
- return tcg_out_fail_alignment(s, l);
64
-}
65
-
66
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
67
-{
68
- return tcg_out_fail_alignment(s, l);
69
-}
70
-#endif /* SOFTMMU */
71
72
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
73
TCGReg addrlo, TCGReg addrhi,
74
--
75
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/mips/tcg-target.c.inc | 57 ++-------------------------------------
9
1 file changed, 2 insertions(+), 55 deletions(-)
10
11
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/mips/tcg-target.c.inc
14
+++ b/tcg/mips/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
16
tcg_out_nop(s);
17
}
18
19
-#if defined(CONFIG_SOFTMMU)
20
/* We have four temps, we might as well expose three of them. */
21
static const TCGLdstHelperParam ldst_helper_param = {
22
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
25
/* resolve label address */
26
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
27
- || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
28
- && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
29
+ || (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
30
return false;
31
}
32
33
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
34
35
/* resolve label address */
36
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
37
- || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
38
- && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
39
+ || (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
40
return false;
41
}
42
43
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
44
return true;
45
}
46
47
-#else
48
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
49
-{
50
- void *target;
51
-
52
- if (!reloc_pc16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
53
- return false;
54
- }
55
-
56
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
57
- /* A0 is env, A1 is skipped, A2:A3 is the uint64_t address. */
58
- TCGReg a2 = MIPS_BE ? l->addrhi_reg : l->addrlo_reg;
59
- TCGReg a3 = MIPS_BE ? l->addrlo_reg : l->addrhi_reg;
60
-
61
- if (a3 != TCG_REG_A2) {
62
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
63
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
64
- } else if (a2 != TCG_REG_A3) {
65
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
66
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
67
- } else {
68
- tcg_out_mov(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A2);
69
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_A3);
70
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, TCG_TMP0);
71
- }
72
- } else {
73
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
74
- }
75
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
76
-
77
- /*
78
- * Tail call to the helper, with the return address back inline.
79
- * We have arrived here via BNEL, so $31 is already set.
80
- */
81
- target = (l->is_ld ? helper_unaligned_ld : helper_unaligned_st);
82
- tcg_out_call_int(s, target, true);
83
- return true;
84
-}
85
-
86
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
87
-{
88
- return tcg_out_fail_alignment(s, l);
89
-}
90
-
91
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
92
-{
93
- return tcg_out_fail_alignment(s, l);
94
-}
95
-#endif /* SOFTMMU */
96
-
97
typedef struct {
98
TCGReg base;
99
MemOp align;
100
--
101
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/s390x/tcg-target.c.inc | 29 -----------------------------
9
1 file changed, 29 deletions(-)
10
11
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/s390x/tcg-target.c.inc
14
+++ b/tcg/s390x/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
16
}
17
}
18
19
-#if defined(CONFIG_SOFTMMU)
20
static const TCGLdstHelperParam ldst_helper_param = {
21
.ntmp = 1, .tmp = { TCG_TMP0 }
22
};
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
31
- (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
37
-
38
- /* "Tail call" to the helper, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
40
- tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st));
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-#endif /* CONFIG_SOFTMMU */
55
56
/*
57
* For softmmu, perform the TLB load and compare.
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 15 +++++++--------
5
1 file changed, 7 insertions(+), 8 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
12
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
13
#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
14
15
-/* Define some temporary registers. T2 is used for constant generation. */
16
+/* Define some temporary registers. T3 is used for constant generation. */
17
#define TCG_REG_T1 TCG_REG_G1
18
-#define TCG_REG_T2 TCG_REG_O7
19
+#define TCG_REG_T2 TCG_REG_G2
20
+#define TCG_REG_T3 TCG_REG_O7
21
22
#ifndef CONFIG_SOFTMMU
23
# define TCG_GUEST_BASE_REG TCG_REG_I5
24
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
25
TCG_REG_I4,
26
TCG_REG_I5,
27
28
- TCG_REG_G2,
29
TCG_REG_G3,
30
TCG_REG_G4,
31
TCG_REG_G5,
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
static void tcg_out_movi(TCGContext *s, TCGType type,
34
TCGReg ret, tcg_target_long arg)
35
{
36
- tcg_debug_assert(ret != TCG_REG_T2);
37
- tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
38
+ tcg_debug_assert(ret != TCG_REG_T3);
39
+ tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T3);
40
}
41
42
static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest,
44
{
45
uintptr_t desti = (uintptr_t)dest;
46
47
- /* Be careful not to clobber %o7 for a tail call. */
48
tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
49
- desti & ~0xfff, in_prologue,
50
- tail_call ? TCG_REG_G2 : TCG_REG_O7);
51
+ desti & ~0xfff, in_prologue, TCG_REG_T2);
52
tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7,
53
TCG_REG_T1, desti & 0xfff, JMPL);
54
}
55
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
56
tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
57
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
58
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
59
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T3); /* for internal use */
60
}
61
62
#define ELF_HOST_MACHINE EM_SPARCV9
63
--
64
2.34.1
diff view generated by jsdifflib
New patch
1
Emphasize that the constant is signed.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/sparc64/tcg-target.c.inc | 21 +++++++++++----------
7
1 file changed, 11 insertions(+), 10 deletions(-)
8
9
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/sparc64/tcg-target.c.inc
12
+++ b/tcg/sparc64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
14
tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
15
}
16
17
-static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
18
+/* A 13-bit constant sign-extended to 64 bits. */
19
+static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
20
{
21
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
22
}
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
24
{
25
if (check_fit_i32(arg, 13)) {
26
/* A 13-bit constant sign-extended to 64-bits. */
27
- tcg_out_movi_imm13(s, ret, arg);
28
+ tcg_out_movi_s13(s, ret, arg);
29
} else {
30
/* A 32-bit constant zero-extended to 64 bits. */
31
tcg_out_sethi(s, ret, arg);
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
34
/* A 13-bit constant sign-extended to 64-bits. */
35
if (check_fit_tl(arg, 13)) {
36
- tcg_out_movi_imm13(s, ret, arg);
37
+ tcg_out_movi_s13(s, ret, arg);
38
return;
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
42
43
default:
44
tcg_out_cmp(s, c1, c2, c2const);
45
- tcg_out_movi_imm13(s, ret, 0);
46
+ tcg_out_movi_s13(s, ret, 0);
47
tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
48
return;
49
}
50
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
51
/* For 64-bit signed comparisons vs zero, we can avoid the compare
52
if the input does not overlap the output. */
53
if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
54
- tcg_out_movi_imm13(s, ret, 0);
55
+ tcg_out_movi_s13(s, ret, 0);
56
tcg_out_movr(s, cond, ret, c1, 1, 1);
57
} else {
58
tcg_out_cmp(s, c1, c2, c2const);
59
- tcg_out_movi_imm13(s, ret, 0);
60
+ tcg_out_movi_s13(s, ret, 0);
61
tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
62
}
63
}
64
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
65
if (use_vis3_instructions && !is_sub) {
66
/* Note that ADDXC doesn't accept immediates. */
67
if (bhconst && bh != 0) {
68
- tcg_out_movi_imm13(s, TCG_REG_T2, bh);
69
+ tcg_out_movi_s13(s, TCG_REG_T2, bh);
70
bh = TCG_REG_T2;
71
}
72
tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
74
* so the adjustment fits 12 bits.
75
*/
76
if (bhconst) {
77
- tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
78
+ tcg_out_movi_s13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
79
} else {
80
tcg_out_arithi(s, TCG_REG_T2, bh, 1,
81
is_sub ? ARITH_SUB : ARITH_ADD);
82
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
83
tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
84
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
85
/* delay slot */
86
- tcg_out_movi_imm13(s, TCG_REG_O0, 0);
87
+ tcg_out_movi_s13(s, TCG_REG_O0, 0);
88
89
build_trampolines(s);
90
}
91
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
92
{
93
if (check_fit_ptr(a0, 13)) {
94
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
95
- tcg_out_movi_imm13(s, TCG_REG_O0, a0);
96
+ tcg_out_movi_s13(s, TCG_REG_O0, a0);
97
return;
98
} else {
99
intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
100
--
101
2.34.1
diff view generated by jsdifflib
New patch
1
Shuffle the order in tcg_out_movi_int to check s13 first, and
2
drop this check from tcg_out_movi_imm32. This might make the
3
sequence for in_prologue larger, but not worth worrying about.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/sparc64/tcg-target.c.inc | 25 ++++++++++---------------
9
1 file changed, 10 insertions(+), 15 deletions(-)
10
11
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/sparc64/tcg-target.c.inc
14
+++ b/tcg/sparc64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
16
17
static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
18
{
19
- if (check_fit_i32(arg, 13)) {
20
- /* A 13-bit constant sign-extended to 64-bits. */
21
- tcg_out_movi_s13(s, ret, arg);
22
- } else {
23
- /* A 32-bit constant zero-extended to 64 bits. */
24
- tcg_out_sethi(s, ret, arg);
25
- if (arg & 0x3ff) {
26
- tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
27
- }
28
+ /* A 32-bit constant zero-extended to 64 bits. */
29
+ tcg_out_sethi(s, ret, arg);
30
+ if (arg & 0x3ff) {
31
+ tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
32
}
33
}
34
35
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
36
tcg_target_long hi, lo = (int32_t)arg;
37
tcg_target_long test, lsb;
38
39
- /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
40
- if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
41
- tcg_out_movi_imm32(s, ret, arg);
42
- return;
43
- }
44
-
45
/* A 13-bit constant sign-extended to 64-bits. */
46
if (check_fit_tl(arg, 13)) {
47
tcg_out_movi_s13(s, ret, arg);
48
return;
49
}
50
51
+ /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
52
+ if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
53
+ tcg_out_movi_imm32(s, ret, arg);
54
+ return;
55
+ }
56
+
57
/* A 13-bit constant relative to the TB. */
58
if (!in_prologue) {
59
test = tcg_tbrel_diff(s, (void *)arg);
60
--
61
2.34.1
diff view generated by jsdifflib
New patch
1
Emphasize that the constant is unsigned.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/sparc64/tcg-target.c.inc | 12 ++++++------
7
1 file changed, 6 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/sparc64/tcg-target.c.inc
12
+++ b/tcg/sparc64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
14
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
15
}
16
17
-static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
18
+/* A 32-bit constant zero-extended to 64 bits. */
19
+static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
20
{
21
- /* A 32-bit constant zero-extended to 64 bits. */
22
tcg_out_sethi(s, ret, arg);
23
if (arg & 0x3ff) {
24
tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
26
27
/* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
28
if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
29
- tcg_out_movi_imm32(s, ret, arg);
30
+ tcg_out_movi_u32(s, ret, arg);
31
return;
32
}
33
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
35
/* A 64-bit constant decomposed into 2 32-bit pieces. */
36
if (check_fit_i32(lo, 13)) {
37
hi = (arg - lo) >> 32;
38
- tcg_out_movi_imm32(s, ret, hi);
39
+ tcg_out_movi_u32(s, ret, hi);
40
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
41
tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
42
} else {
43
hi = arg >> 32;
44
- tcg_out_movi_imm32(s, ret, hi);
45
- tcg_out_movi_imm32(s, scratch, lo);
46
+ tcg_out_movi_u32(s, ret, hi);
47
+ tcg_out_movi_u32(s, scratch, lo);
48
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
49
tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
50
}
51
--
52
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 10 ++++++++--
5
1 file changed, 8 insertions(+), 2 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
12
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
13
}
14
15
+/* A 32-bit constant sign-extended to 64 bits. */
16
+static void tcg_out_movi_s32(TCGContext *s, TCGReg ret, int32_t arg)
17
+{
18
+ tcg_out_sethi(s, ret, ~arg);
19
+ tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
20
+}
21
+
22
/* A 32-bit constant zero-extended to 64 bits. */
23
static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
24
{
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
26
27
/* A 32-bit constant sign-extended to 64-bits. */
28
if (arg == lo) {
29
- tcg_out_sethi(s, ret, ~arg);
30
- tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
31
+ tcg_out_movi_s32(s, ret, arg);
32
return;
33
}
34
35
--
36
2.34.1
diff view generated by jsdifflib
New patch
1
Drop the target-specific trampolines for the standard slow path.
2
This lets us use tcg_out_helper_{ld,st}_args, and handles the new
3
atomicity bits within MemOp.
1
4
5
At the same time, use the full load/store helpers for user-only mode.
6
Drop inline unaligned access support for user-only mode, as it does
7
not handle atomicity.
8
9
Use TCG_REG_T[1-3] in the tlb lookup, instead of TCG_REG_O[0-2].
10
This allows the constraints to be simplified.
11
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
tcg/sparc64/tcg-target-con-set.h | 2 -
16
tcg/sparc64/tcg-target-con-str.h | 1 -
17
tcg/sparc64/tcg-target.h | 1 +
18
tcg/sparc64/tcg-target.c.inc | 610 +++++++++----------------------
19
4 files changed, 182 insertions(+), 432 deletions(-)
20
21
diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/sparc64/tcg-target-con-set.h
24
+++ b/tcg/sparc64/tcg-target-con-set.h
25
@@ -XXX,XX +XXX,XX @@
26
C_O0_I1(r)
27
C_O0_I2(rZ, r)
28
C_O0_I2(rZ, rJ)
29
-C_O0_I2(sZ, s)
30
-C_O1_I1(r, s)
31
C_O1_I1(r, r)
32
C_O1_I2(r, r, r)
33
C_O1_I2(r, rZ, rJ)
34
diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/sparc64/tcg-target-con-str.h
37
+++ b/tcg/sparc64/tcg-target-con-str.h
38
@@ -XXX,XX +XXX,XX @@
39
* REGS(letter, register_mask)
40
*/
41
REGS('r', ALL_GENERAL_REGS)
42
-REGS('s', ALL_QLDST_REGS)
43
44
/*
45
* Define constraint letters for constants:
46
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/sparc64/tcg-target.h
49
+++ b/tcg/sparc64/tcg-target.h
50
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
51
52
#define TCG_TARGET_DEFAULT_MO (0)
53
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
54
+#define TCG_TARGET_NEED_LDST_LABELS
55
#define TCG_TARGET_NEED_POOL_LABELS
56
57
#endif
58
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/sparc64/tcg-target.c.inc
61
+++ b/tcg/sparc64/tcg-target.c.inc
62
@@ -XXX,XX +XXX,XX @@
63
#error "unsupported code generation mode"
64
#endif
65
66
+#include "../tcg-ldst.c.inc"
67
#include "../tcg-pool.c.inc"
68
69
#ifdef CONFIG_DEBUG_TCG
70
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
71
#define TCG_CT_CONST_S13 0x200
72
#define TCG_CT_CONST_ZERO 0x400
73
74
-/*
75
- * For softmmu, we need to avoid conflicts with the first 3
76
- * argument registers to perform the tlb lookup, and to call
77
- * the helper function.
78
- */
79
-#ifdef CONFIG_SOFTMMU
80
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
81
-#else
82
-#define SOFTMMU_RESERVE_REGS 0
83
-#endif
84
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
85
-#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
86
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
87
88
/* Define some temporary registers. T3 is used for constant generation. */
89
#define TCG_REG_T1 TCG_REG_G1
90
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
91
tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
92
}
93
94
-#ifdef CONFIG_SOFTMMU
95
-static const tcg_insn_unit *qemu_ld_trampoline[MO_SSIZE + 1];
96
-static const tcg_insn_unit *qemu_st_trampoline[MO_SIZE + 1];
97
-
98
-static void build_trampolines(TCGContext *s)
99
-{
100
- int i;
101
-
102
- for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
103
- if (qemu_ld_helpers[i] == NULL) {
104
- continue;
105
- }
106
-
107
- /* May as well align the trampoline. */
108
- while ((uintptr_t)s->code_ptr & 15) {
109
- tcg_out_nop(s);
110
- }
111
- qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
112
-
113
- /* Set the retaddr operand. */
114
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
115
- /* Tail call. */
116
- tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
117
- /* delay slot -- set the env argument */
118
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
119
- }
120
-
121
- for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
122
- if (qemu_st_helpers[i] == NULL) {
123
- continue;
124
- }
125
-
126
- /* May as well align the trampoline. */
127
- while ((uintptr_t)s->code_ptr & 15) {
128
- tcg_out_nop(s);
129
- }
130
- qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
131
-
132
- /* Set the retaddr operand. */
133
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
134
-
135
- /* Tail call. */
136
- tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
137
- /* delay slot -- set the env argument */
138
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
139
- }
140
-}
141
-#else
142
-static const tcg_insn_unit *qemu_unalign_ld_trampoline;
143
-static const tcg_insn_unit *qemu_unalign_st_trampoline;
144
-
145
-static void build_trampolines(TCGContext *s)
146
-{
147
- for (int ld = 0; ld < 2; ++ld) {
148
- void *helper;
149
-
150
- while ((uintptr_t)s->code_ptr & 15) {
151
- tcg_out_nop(s);
152
- }
153
-
154
- if (ld) {
155
- helper = helper_unaligned_ld;
156
- qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr);
157
- } else {
158
- helper = helper_unaligned_st;
159
- qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
160
- }
161
-
162
- /* Tail call. */
163
- tcg_out_jmpl_const(s, helper, true, true);
164
- /* delay slot -- set the env argument */
165
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
166
- }
167
-}
168
-#endif
169
-
170
/* Generate global QEMU prologue and epilogue code */
171
static void tcg_target_qemu_prologue(TCGContext *s)
172
{
173
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
174
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
175
/* delay slot */
176
tcg_out_movi_s13(s, TCG_REG_O0, 0);
177
-
178
- build_trampolines(s);
179
}
180
181
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
183
}
184
}
185
186
-#if defined(CONFIG_SOFTMMU)
187
+static const TCGLdstHelperParam ldst_helper_param = {
188
+ .ntmp = 1, .tmp = { TCG_REG_T1 }
189
+};
190
191
-/* We expect to use a 13-bit negative offset from ENV. */
192
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
193
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
194
-
195
-/* Perform the TLB load and compare.
196
-
197
- Inputs:
198
- ADDRLO and ADDRHI contain the possible two parts of the address.
199
-
200
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
201
-
202
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
203
- This should be offsetof addr_read or addr_write.
204
-
205
- The result of the TLB comparison is in %[ix]cc. The sanitized address
206
- is in the returned register, maybe %o0. The TLB addend is in %o1. */
207
-
208
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
209
- MemOp opc, int which)
210
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
211
{
212
+ MemOp opc = get_memop(lb->oi);
213
+ MemOp sgn;
214
+
215
+ if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
216
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
217
+ return false;
218
+ }
219
+
220
+ /* Use inline tcg_out_ext32s; otherwise let the helper sign-extend. */
221
+ sgn = (opc & MO_SIZE) < MO_32 ? MO_SIGN : 0;
222
+
223
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
224
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_SIZE | sgn)], NULL);
225
+ tcg_out_ld_helper_ret(s, lb, sgn, &ldst_helper_param);
226
+
227
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
228
+ return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
229
+ (intptr_t)lb->raddr, 0);
230
+}
231
+
232
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
233
+{
234
+ MemOp opc = get_memop(lb->oi);
235
+
236
+ if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
237
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
238
+ return false;
239
+ }
240
+
241
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
242
+ tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE], NULL);
243
+
244
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
245
+ return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
246
+ (intptr_t)lb->raddr, 0);
247
+}
248
+
249
+typedef struct {
250
+ TCGReg base;
251
+ TCGReg index;
252
+} HostAddress;
253
+
254
+/*
255
+ * For softmmu, perform the TLB load and compare.
256
+ * For useronly, perform any required alignment tests.
257
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
258
+ * is required and fill in @h with the host address for the fast path.
259
+ */
260
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
261
+ TCGReg addr_reg, MemOpIdx oi,
262
+ bool is_ld)
263
+{
264
+ TCGLabelQemuLdst *ldst = NULL;
265
+ MemOp opc = get_memop(oi);
266
+ unsigned a_bits = get_alignment_bits(opc);
267
+ unsigned s_bits = opc & MO_SIZE;
268
+ unsigned a_mask;
269
+
270
+ /* We don't support unaligned accesses. */
271
+ a_bits = MAX(a_bits, s_bits);
272
+ a_mask = (1u << a_bits) - 1;
273
+
274
+#ifdef CONFIG_SOFTMMU
275
+ int mem_index = get_mmuidx(oi);
276
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
277
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
278
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
279
- const TCGReg r0 = TCG_REG_O0;
280
- const TCGReg r1 = TCG_REG_O1;
281
- const TCGReg r2 = TCG_REG_O2;
282
- unsigned s_bits = opc & MO_SIZE;
283
- unsigned a_bits = get_alignment_bits(opc);
284
- tcg_target_long compare_mask;
285
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
286
+ : offsetof(CPUTLBEntry, addr_write);
287
+ int add_off = offsetof(CPUTLBEntry, addend);
288
+ int compare_mask;
289
+ int cc;
290
291
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
292
- tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
293
- tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
294
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
295
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
296
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T2, TCG_AREG0, mask_off);
297
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T3, TCG_AREG0, table_off);
298
299
/* Extract the page index, shifted into place for tlb index. */
300
- tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
301
- SHIFT_SRL);
302
- tcg_out_arith(s, r2, r2, r0, ARITH_AND);
303
+ tcg_out_arithi(s, TCG_REG_T1, addr_reg,
304
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
305
+ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
306
307
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
308
- tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
309
+ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T3, ARITH_ADD);
310
311
- /* Load the tlb comparator and the addend. */
312
- tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
313
- tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
314
+ /* Load the tlb comparator and the addend. */
315
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_T2, TCG_REG_T1, cmp_off);
316
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T1, TCG_REG_T1, add_off);
317
+ h->base = TCG_REG_T1;
318
319
- /* Mask out the page offset, except for the required alignment.
320
- We don't support unaligned accesses. */
321
- if (a_bits < s_bits) {
322
- a_bits = s_bits;
323
- }
324
- compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
325
+ /* Mask out the page offset, except for the required alignment. */
326
+ compare_mask = TARGET_PAGE_MASK | a_mask;
327
if (check_fit_tl(compare_mask, 13)) {
328
- tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
329
+ tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
330
} else {
331
- tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
332
- tcg_out_arith(s, r2, addr, r2, ARITH_AND);
333
+ tcg_out_movi_s32(s, TCG_REG_T3, compare_mask);
334
+ tcg_out_arith(s, TCG_REG_T3, addr_reg, TCG_REG_T3, ARITH_AND);
335
}
336
- tcg_out_cmp(s, r0, r2, 0);
337
+ tcg_out_cmp(s, TCG_REG_T2, TCG_REG_T3, 0);
338
339
- /* If the guest address must be zero-extended, do so now. */
340
+ ldst = new_ldst_label(s);
341
+ ldst->is_ld = is_ld;
342
+ ldst->oi = oi;
343
+ ldst->addrlo_reg = addr_reg;
344
+ ldst->label_ptr[0] = s->code_ptr;
345
+
346
+ /* bne,pn %[xi]cc, label0 */
347
+ cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
348
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
349
+#else
350
+ if (a_bits != s_bits) {
351
+ /*
352
+ * Test for at least natural alignment, and defer
353
+ * everything else to the helper functions.
354
+ */
355
+ tcg_debug_assert(check_fit_tl(a_mask, 13));
356
+ tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
357
+
358
+ ldst = new_ldst_label(s);
359
+ ldst->is_ld = is_ld;
360
+ ldst->oi = oi;
361
+ ldst->addrlo_reg = addr_reg;
362
+ ldst->label_ptr[0] = s->code_ptr;
363
+
364
+ /* bne,pn %icc, label0 */
365
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN | BPCC_ICC, 0);
366
+ }
367
+ h->base = guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0;
368
+#endif
369
+
370
+ /* If the guest address must be zero-extended, do in the delay slot. */
371
if (TARGET_LONG_BITS == 32) {
372
- tcg_out_ext32u(s, r0, addr);
373
- return r0;
374
+ tcg_out_ext32u(s, TCG_REG_T2, addr_reg);
375
+ h->index = TCG_REG_T2;
376
+ } else {
377
+ if (ldst) {
378
+ tcg_out_nop(s);
379
+ }
380
+ h->index = addr_reg;
381
}
382
- return addr;
383
+ return ldst;
384
}
385
-#endif /* CONFIG_SOFTMMU */
386
-
387
-static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
388
- [MO_UB] = LDUB,
389
- [MO_SB] = LDSB,
390
- [MO_UB | MO_LE] = LDUB,
391
- [MO_SB | MO_LE] = LDSB,
392
-
393
- [MO_BEUW] = LDUH,
394
- [MO_BESW] = LDSH,
395
- [MO_BEUL] = LDUW,
396
- [MO_BESL] = LDSW,
397
- [MO_BEUQ] = LDX,
398
- [MO_BESQ] = LDX,
399
-
400
- [MO_LEUW] = LDUH_LE,
401
- [MO_LESW] = LDSH_LE,
402
- [MO_LEUL] = LDUW_LE,
403
- [MO_LESL] = LDSW_LE,
404
- [MO_LEUQ] = LDX_LE,
405
- [MO_LESQ] = LDX_LE,
406
-};
407
-
408
-static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
409
- [MO_UB] = STB,
410
-
411
- [MO_BEUW] = STH,
412
- [MO_BEUL] = STW,
413
- [MO_BEUQ] = STX,
414
-
415
- [MO_LEUW] = STH_LE,
416
- [MO_LEUL] = STW_LE,
417
- [MO_LEUQ] = STX_LE,
418
-};
419
420
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
421
MemOpIdx oi, TCGType data_type)
422
{
423
- MemOp memop = get_memop(oi);
424
- tcg_insn_unit *label_ptr;
425
+ static const int ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
426
+ [MO_UB] = LDUB,
427
+ [MO_SB] = LDSB,
428
+ [MO_UB | MO_LE] = LDUB,
429
+ [MO_SB | MO_LE] = LDSB,
430
431
-#ifdef CONFIG_SOFTMMU
432
- unsigned memi = get_mmuidx(oi);
433
- TCGReg addrz;
434
- const tcg_insn_unit *func;
435
+ [MO_BEUW] = LDUH,
436
+ [MO_BESW] = LDSH,
437
+ [MO_BEUL] = LDUW,
438
+ [MO_BESL] = LDSW,
439
+ [MO_BEUQ] = LDX,
440
+ [MO_BESQ] = LDX,
441
442
- addrz = tcg_out_tlb_load(s, addr, memi, memop,
443
- offsetof(CPUTLBEntry, addr_read));
444
+ [MO_LEUW] = LDUH_LE,
445
+ [MO_LESW] = LDSH_LE,
446
+ [MO_LEUL] = LDUW_LE,
447
+ [MO_LESL] = LDSW_LE,
448
+ [MO_LEUQ] = LDX_LE,
449
+ [MO_LESQ] = LDX_LE,
450
+ };
451
452
- /* The fast path is exactly one insn. Thus we can perform the
453
- entire TLB Hit in the (annulled) delay slot of the branch
454
- over the TLB Miss case. */
455
+ TCGLabelQemuLdst *ldst;
456
+ HostAddress h;
457
458
- /* beq,a,pt %[xi]cc, label0 */
459
- label_ptr = s->code_ptr;
460
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
461
- | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
462
- /* delay slot */
463
- tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
464
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
465
+ ldst = prepare_host_addr(s, &h, addr, oi, true);
466
467
- /* TLB Miss. */
468
+ tcg_out_ldst_rr(s, data, h.base, h.index,
469
+ ld_opc[get_memop(oi) & (MO_BSWAP | MO_SSIZE)]);
470
471
- tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
472
-
473
- /* We use the helpers to extend SB and SW data, leaving the case
474
- of SL needing explicit extending below. */
475
- if ((memop & MO_SSIZE) == MO_SL) {
476
- func = qemu_ld_trampoline[MO_UL];
477
- } else {
478
- func = qemu_ld_trampoline[memop & MO_SSIZE];
479
+ if (ldst) {
480
+ ldst->type = data_type;
481
+ ldst->datalo_reg = data;
482
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
483
}
484
- tcg_debug_assert(func != NULL);
485
- tcg_out_call_nodelay(s, func, false);
486
- /* delay slot */
487
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
488
-
489
- /* We let the helper sign-extend SB and SW, but leave SL for here. */
490
- if ((memop & MO_SSIZE) == MO_SL) {
491
- tcg_out_ext32s(s, data, TCG_REG_O0);
492
- } else {
493
- tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
494
- }
495
-
496
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
497
-#else
498
- TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
499
- unsigned a_bits = get_alignment_bits(memop);
500
- unsigned s_bits = memop & MO_SIZE;
501
- unsigned t_bits;
502
-
503
- if (TARGET_LONG_BITS == 32) {
504
- tcg_out_ext32u(s, TCG_REG_T1, addr);
505
- addr = TCG_REG_T1;
506
- }
507
-
508
- /*
509
- * Normal case: alignment equal to access size.
510
- */
511
- if (a_bits == s_bits) {
512
- tcg_out_ldst_rr(s, data, addr, index,
513
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
514
- return;
515
- }
516
-
517
- /*
518
- * Test for at least natural alignment, and assume most accesses
519
- * will be aligned -- perform a straight load in the delay slot.
520
- * This is required to preserve atomicity for aligned accesses.
521
- */
522
- t_bits = MAX(a_bits, s_bits);
523
- tcg_debug_assert(t_bits < 13);
524
- tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
525
-
526
- /* beq,a,pt %icc, label */
527
- label_ptr = s->code_ptr;
528
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
529
- /* delay slot */
530
- tcg_out_ldst_rr(s, data, addr, index,
531
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
532
-
533
- if (a_bits >= s_bits) {
534
- /*
535
- * Overalignment: A successful alignment test will perform the memory
536
- * operation in the delay slot, and failure need only invoke the
537
- * handler for SIGBUS.
538
- */
539
- tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
540
- /* delay slot -- move to low part of argument reg */
541
- tcg_out_mov_delay(s, TCG_REG_O1, addr);
542
- } else {
543
- /* Underalignment: load by pieces of minimum alignment. */
544
- int ld_opc, a_size, s_size, i;
545
-
546
- /*
547
- * Force full address into T1 early; avoids problems with
548
- * overlap between @addr and @data.
549
- */
550
- tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
551
-
552
- a_size = 1 << a_bits;
553
- s_size = 1 << s_bits;
554
- if ((memop & MO_BSWAP) == MO_BE) {
555
- ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)];
556
- tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
557
- ld_opc = qemu_ld_opc[a_bits | MO_BE];
558
- for (i = a_size; i < s_size; i += a_size) {
559
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
560
- tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX);
561
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
562
- }
563
- } else if (a_bits == 0) {
564
- ld_opc = LDUB;
565
- tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
566
- for (i = a_size; i < s_size; i += a_size) {
567
- if ((memop & MO_SIGN) && i == s_size - a_size) {
568
- ld_opc = LDSB;
569
- }
570
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
571
- tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
572
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
573
- }
574
- } else {
575
- ld_opc = qemu_ld_opc[a_bits | MO_LE];
576
- tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc);
577
- for (i = a_size; i < s_size; i += a_size) {
578
- tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
579
- if ((memop & MO_SIGN) && i == s_size - a_size) {
580
- ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN];
581
- }
582
- tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc);
583
- tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
584
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
585
- }
586
- }
587
- }
588
-
589
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
590
-#endif /* CONFIG_SOFTMMU */
591
}
592
593
static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
594
MemOpIdx oi, TCGType data_type)
595
{
596
- MemOp memop = get_memop(oi);
597
- tcg_insn_unit *label_ptr;
598
+ static const int st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
599
+ [MO_UB] = STB,
600
601
-#ifdef CONFIG_SOFTMMU
602
- unsigned memi = get_mmuidx(oi);
603
- TCGReg addrz;
604
- const tcg_insn_unit *func;
605
+ [MO_BEUW] = STH,
606
+ [MO_BEUL] = STW,
607
+ [MO_BEUQ] = STX,
608
609
- addrz = tcg_out_tlb_load(s, addr, memi, memop,
610
- offsetof(CPUTLBEntry, addr_write));
611
+ [MO_LEUW] = STH_LE,
612
+ [MO_LEUL] = STW_LE,
613
+ [MO_LEUQ] = STX_LE,
614
+ };
615
616
- /* The fast path is exactly one insn. Thus we can perform the entire
617
- TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
618
- /* beq,a,pt %[xi]cc, label0 */
619
- label_ptr = s->code_ptr;
620
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
621
- | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
622
- /* delay slot */
623
- tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
624
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
625
+ TCGLabelQemuLdst *ldst;
626
+ HostAddress h;
627
628
- /* TLB Miss. */
629
+ ldst = prepare_host_addr(s, &h, addr, oi, false);
630
631
- tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
632
- tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
633
- TCG_REG_O2, data_type, memop & MO_SIZE, data);
634
+ tcg_out_ldst_rr(s, data, h.base, h.index,
635
+ st_opc[get_memop(oi) & (MO_BSWAP | MO_SIZE)]);
636
637
- func = qemu_st_trampoline[memop & MO_SIZE];
638
- tcg_debug_assert(func != NULL);
639
- tcg_out_call_nodelay(s, func, false);
640
- /* delay slot */
641
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
642
-
643
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
644
-#else
645
- TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
646
- unsigned a_bits = get_alignment_bits(memop);
647
- unsigned s_bits = memop & MO_SIZE;
648
- unsigned t_bits;
649
-
650
- if (TARGET_LONG_BITS == 32) {
651
- tcg_out_ext32u(s, TCG_REG_T1, addr);
652
- addr = TCG_REG_T1;
653
+ if (ldst) {
654
+ ldst->type = data_type;
655
+ ldst->datalo_reg = data;
656
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
657
}
658
-
659
- /*
660
- * Normal case: alignment equal to access size.
661
- */
662
- if (a_bits == s_bits) {
663
- tcg_out_ldst_rr(s, data, addr, index,
664
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
665
- return;
666
- }
667
-
668
- /*
669
- * Test for at least natural alignment, and assume most accesses
670
- * will be aligned -- perform a straight store in the delay slot.
671
- * This is required to preserve atomicity for aligned accesses.
672
- */
673
- t_bits = MAX(a_bits, s_bits);
674
- tcg_debug_assert(t_bits < 13);
675
- tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
676
-
677
- /* beq,a,pt %icc, label */
678
- label_ptr = s->code_ptr;
679
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
680
- /* delay slot */
681
- tcg_out_ldst_rr(s, data, addr, index,
682
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
683
-
684
- if (a_bits >= s_bits) {
685
- /*
686
- * Overalignment: A successful alignment test will perform the memory
687
- * operation in the delay slot, and failure need only invoke the
688
- * handler for SIGBUS.
689
- */
690
- tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
691
- /* delay slot -- move to low part of argument reg */
692
- tcg_out_mov_delay(s, TCG_REG_O1, addr);
693
- } else {
694
- /* Underalignment: store by pieces of minimum alignment. */
695
- int st_opc, a_size, s_size, i;
696
-
697
- /*
698
- * Force full address into T1 early; avoids problems with
699
- * overlap between @addr and @data.
700
- */
701
- tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
702
-
703
- a_size = 1 << a_bits;
704
- s_size = 1 << s_bits;
705
- if ((memop & MO_BSWAP) == MO_BE) {
706
- st_opc = qemu_st_opc[a_bits | MO_BE];
707
- for (i = 0; i < s_size; i += a_size) {
708
- TCGReg d = data;
709
- int shift = (s_size - a_size - i) * 8;
710
- if (shift) {
711
- d = TCG_REG_T2;
712
- tcg_out_arithi(s, d, data, shift, SHIFT_SRLX);
713
- }
714
- tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc);
715
- }
716
- } else if (a_bits == 0) {
717
- tcg_out_ldst(s, data, TCG_REG_T1, 0, STB);
718
- for (i = 1; i < s_size; i++) {
719
- tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
720
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB);
721
- }
722
- } else {
723
- /* Note that ST*A with immediate asi must use indexed address. */
724
- st_opc = qemu_st_opc[a_bits + MO_LE];
725
- tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc);
726
- for (i = a_size; i < s_size; i += a_size) {
727
- tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
728
- tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
729
- tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc);
730
- }
731
- }
732
- }
733
-
734
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
735
-#endif /* CONFIG_SOFTMMU */
736
}
737
738
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
739
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
740
case INDEX_op_extu_i32_i64:
741
case INDEX_op_extrl_i64_i32:
742
case INDEX_op_extrh_i64_i32:
743
+ case INDEX_op_qemu_ld_i32:
744
+ case INDEX_op_qemu_ld_i64:
745
return C_O1_I1(r, r);
746
747
case INDEX_op_st8_i32:
748
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
749
case INDEX_op_st_i32:
750
case INDEX_op_st32_i64:
751
case INDEX_op_st_i64:
752
+ case INDEX_op_qemu_st_i32:
753
+ case INDEX_op_qemu_st_i64:
754
return C_O0_I2(rZ, r);
755
756
case INDEX_op_add_i32:
757
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
758
case INDEX_op_muluh_i64:
759
return C_O1_I2(r, r, r);
760
761
- case INDEX_op_qemu_ld_i32:
762
- case INDEX_op_qemu_ld_i64:
763
- return C_O1_I1(r, s);
764
- case INDEX_op_qemu_st_i32:
765
- case INDEX_op_qemu_st_i64:
766
- return C_O0_I2(sZ, s);
767
-
768
default:
769
g_assert_not_reached();
770
}
771
--
772
2.34.1
diff view generated by jsdifflib
New patch
1
These functions are now unused.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg-ldst.h | 6 ------
7
accel/tcg/user-exec.c | 10 ----------
8
2 files changed, 16 deletions(-)
9
10
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg-ldst.h
13
+++ b/include/tcg/tcg-ldst.h
14
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
15
void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
16
MemOpIdx oi, uintptr_t retaddr);
17
18
-#ifdef CONFIG_USER_ONLY
19
-
20
-G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
21
-G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
22
-
23
-#endif /* CONFIG_USER_ONLY */
24
#endif /* TCG_LDST_H */
25
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/accel/tcg/user-exec.c
28
+++ b/accel/tcg/user-exec.c
29
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
30
31
/* The softmmu versions of these helpers are in cputlb.c. */
32
33
-void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
34
-{
35
- cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
36
-}
37
-
38
-void helper_unaligned_st(CPUArchState *env, target_ulong addr)
39
-{
40
- cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
41
-}
42
-
43
static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
44
MemOp mop, uintptr_t ra, MMUAccessType type)
45
{
46
--
47
2.34.1
diff view generated by jsdifflib
New patch
1
This should be true of all loongarch64 running Linux.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/loongarch64/tcg-target.c.inc | 9 +++++++++
7
1 file changed, 9 insertions(+)
8
9
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/loongarch64/tcg-target.c.inc
12
+++ b/tcg/loongarch64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@
14
*/
15
16
#include "../tcg-ldst.c.inc"
17
+#include <asm/hwcap.h>
18
19
#ifdef CONFIG_DEBUG_TCG
20
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
21
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
22
23
static void tcg_target_init(TCGContext *s)
24
{
25
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
26
+
27
+ /* Server and desktop class cpus have UAL; embedded cpus do not. */
28
+ if (!(hwcap & HWCAP_LOONGARCH_UAL)) {
29
+ error_report("TCG: unaligned access support required; exiting");
30
+ exit(EXIT_FAILURE);
31
+ }
32
+
33
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
34
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
35
36
--
37
2.34.1
diff view generated by jsdifflib
New patch
1
Test the final byte of an unaligned access.
2
Use BSTRINS.D to clear the range of bits, rather than AND.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/loongarch64/tcg-target.c.inc | 19 ++++++++++++-------
8
1 file changed, 12 insertions(+), 7 deletions(-)
9
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/loongarch64/tcg-target.c.inc
13
+++ b/tcg/loongarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
16
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
17
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
18
- tcg_target_long compare_mask;
19
20
ldst = new_ldst_label(s);
21
ldst->is_ld = is_ld;
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
24
offsetof(CPUTLBEntry, addend));
25
26
- /* We don't support unaligned accesses. */
27
+ /*
28
+ * For aligned accesses, we check the first byte and include the alignment
29
+ * bits within the address. For unaligned access, we check that we don't
30
+ * cross pages using the address of the last byte of the access.
31
+ */
32
if (a_bits < s_bits) {
33
- a_bits = s_bits;
34
+ unsigned a_mask = (1u << a_bits) - 1;
35
+ unsigned s_mask = (1u << s_bits) - 1;
36
+ tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
37
+ } else {
38
+ tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg);
39
}
40
- /* Clear the non-page, non-alignment bits from the address. */
41
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
42
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
43
- tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
44
+ tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
45
+ a_bits, TARGET_PAGE_BITS - 1);
46
47
/* Compare masked address with the TLB entry. */
48
ldst->label_ptr[0] = s->code_ptr;
49
--
50
2.34.1
diff view generated by jsdifflib
New patch
1
The system is required to emulate unaligned accesses, even if the
2
hardware does not support it. The resulting trap may or may not
3
be more efficient than the qemu slow path. There are linux kernel
4
patches in flight to allow userspace to query hardware support;
5
we can re-evaluate whether to enable this by default after that.
1
6
7
In the meantime, softmmu now matches useronly, where we already
8
assumed that unaligned accesses are supported.
9
10
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/riscv/tcg-target.c.inc | 48 ++++++++++++++++++++++----------------
14
1 file changed, 28 insertions(+), 20 deletions(-)
15
16
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/riscv/tcg-target.c.inc
19
+++ b/tcg/riscv/tcg-target.c.inc
20
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
21
22
#ifdef CONFIG_SOFTMMU
23
unsigned s_bits = opc & MO_SIZE;
24
+ unsigned s_mask = (1u << s_bits) - 1;
25
int mem_index = get_mmuidx(oi);
26
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
27
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
28
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
29
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
30
- tcg_target_long compare_mask;
31
+ int compare_mask;
32
+ TCGReg addr_adj;
33
34
ldst = new_ldst_label(s);
35
ldst->is_ld = is_ld;
36
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
37
38
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
39
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
40
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
41
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
42
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
43
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
44
45
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
46
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
47
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
48
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
49
50
+ /*
51
+ * For aligned accesses, we check the first byte and include the alignment
52
+ * bits within the address. For unaligned access, we check that we don't
53
+ * cross pages using the address of the last byte of the access.
54
+ */
55
+ addr_adj = addr_reg;
56
+ if (a_bits < s_bits) {
57
+ addr_adj = TCG_REG_TMP0;
58
+ tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
59
+ addr_adj, addr_reg, s_mask - a_mask);
60
+ }
61
+ compare_mask = TARGET_PAGE_MASK | a_mask;
62
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
63
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
64
+ } else {
65
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
66
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
67
+ }
68
+
69
/* Load the tlb comparator and the addend. */
70
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
71
is_ld ? offsetof(CPUTLBEntry, addr_read)
72
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
73
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
74
offsetof(CPUTLBEntry, addend));
75
76
- /* We don't support unaligned accesses. */
77
- if (a_bits < s_bits) {
78
- a_bits = s_bits;
79
- }
80
- /* Clear the non-page, non-alignment bits from the address. */
81
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
82
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
83
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
84
- } else {
85
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
86
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
87
- }
88
-
89
/* Compare masked address with the TLB entry. */
90
ldst->label_ptr[0] = s->code_ptr;
91
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
92
93
/* TLB Hit - translate address using addend. */
94
+ addr_adj = addr_reg;
95
if (TARGET_LONG_BITS == 32) {
96
- tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
97
- addr_reg = TCG_REG_TMP0;
98
+ addr_adj = TCG_REG_TMP0;
99
+ tcg_out_ext32u(s, addr_adj, addr_reg);
100
}
101
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
102
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_adj);
103
*pbase = TCG_REG_TMP0;
104
#else
105
if (a_mask) {
106
--
107
2.34.1
diff view generated by jsdifflib
New patch
1
Replace the unparameterized TCG_TARGET_HAS_MEMORY_BSWAP macro
2
with a function with a memop argument.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target.h | 1 -
8
tcg/arm/tcg-target.h | 1 -
9
tcg/i386/tcg-target.h | 3 ---
10
tcg/loongarch64/tcg-target.h | 2 --
11
tcg/mips/tcg-target.h | 2 --
12
tcg/ppc/tcg-target.h | 1 -
13
tcg/riscv/tcg-target.h | 2 --
14
tcg/s390x/tcg-target.h | 2 --
15
tcg/sparc64/tcg-target.h | 1 -
16
tcg/tcg-internal.h | 2 ++
17
tcg/tci/tcg-target.h | 2 --
18
tcg/tcg-op.c | 20 +++++++++++---------
19
tcg/aarch64/tcg-target.c.inc | 5 +++++
20
tcg/arm/tcg-target.c.inc | 5 +++++
21
tcg/i386/tcg-target.c.inc | 5 +++++
22
tcg/loongarch64/tcg-target.c.inc | 5 +++++
23
tcg/mips/tcg-target.c.inc | 5 +++++
24
tcg/ppc/tcg-target.c.inc | 5 +++++
25
tcg/riscv/tcg-target.c.inc | 5 +++++
26
tcg/s390x/tcg-target.c.inc | 5 +++++
27
tcg/sparc64/tcg-target.c.inc | 5 +++++
28
tcg/tci/tcg-target.c.inc | 5 +++++
29
22 files changed, 63 insertions(+), 26 deletions(-)
30
31
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/aarch64/tcg-target.h
34
+++ b/tcg/aarch64/tcg-target.h
35
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
36
#define TCG_TARGET_HAS_cmpsel_vec 0
37
38
#define TCG_TARGET_DEFAULT_MO (0)
39
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
40
#define TCG_TARGET_NEED_LDST_LABELS
41
#define TCG_TARGET_NEED_POOL_LABELS
42
43
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tcg/arm/tcg-target.h
46
+++ b/tcg/arm/tcg-target.h
47
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
48
#define TCG_TARGET_HAS_cmpsel_vec 0
49
50
#define TCG_TARGET_DEFAULT_MO (0)
51
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
52
#define TCG_TARGET_NEED_LDST_LABELS
53
#define TCG_TARGET_NEED_POOL_LABELS
54
55
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/tcg/i386/tcg-target.h
58
+++ b/tcg/i386/tcg-target.h
59
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
60
#include "tcg/tcg-mo.h"
61
62
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
63
-
64
-#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
65
-
66
#define TCG_TARGET_NEED_LDST_LABELS
67
#define TCG_TARGET_NEED_POOL_LABELS
68
69
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
70
index XXXXXXX..XXXXXXX 100644
71
--- a/tcg/loongarch64/tcg-target.h
72
+++ b/tcg/loongarch64/tcg-target.h
73
@@ -XXX,XX +XXX,XX @@ typedef enum {
74
75
#define TCG_TARGET_NEED_LDST_LABELS
76
77
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
78
-
79
#endif /* LOONGARCH_TCG_TARGET_H */
80
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/mips/tcg-target.h
83
+++ b/tcg/mips/tcg-target.h
84
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
85
#endif
86
87
#define TCG_TARGET_DEFAULT_MO 0
88
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
89
-
90
#define TCG_TARGET_NEED_LDST_LABELS
91
92
#endif
93
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/tcg/ppc/tcg-target.h
96
+++ b/tcg/ppc/tcg-target.h
97
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
98
#define TCG_TARGET_HAS_cmpsel_vec 0
99
100
#define TCG_TARGET_DEFAULT_MO (0)
101
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
102
#define TCG_TARGET_NEED_LDST_LABELS
103
#define TCG_TARGET_NEED_POOL_LABELS
104
105
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/tcg/riscv/tcg-target.h
108
+++ b/tcg/riscv/tcg-target.h
109
@@ -XXX,XX +XXX,XX @@ typedef enum {
110
#define TCG_TARGET_NEED_LDST_LABELS
111
#define TCG_TARGET_NEED_POOL_LABELS
112
113
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
114
-
115
#endif
116
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
117
index XXXXXXX..XXXXXXX 100644
118
--- a/tcg/s390x/tcg-target.h
119
+++ b/tcg/s390x/tcg-target.h
120
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
121
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
122
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
123
124
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
125
-
126
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
127
#define TCG_TARGET_NEED_LDST_LABELS
128
#define TCG_TARGET_NEED_POOL_LABELS
129
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
130
index XXXXXXX..XXXXXXX 100644
131
--- a/tcg/sparc64/tcg-target.h
132
+++ b/tcg/sparc64/tcg-target.h
133
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
134
#define TCG_AREG0 TCG_REG_I0
135
136
#define TCG_TARGET_DEFAULT_MO (0)
137
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
138
#define TCG_TARGET_NEED_LDST_LABELS
139
#define TCG_TARGET_NEED_POOL_LABELS
140
141
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
142
index XXXXXXX..XXXXXXX 100644
143
--- a/tcg/tcg-internal.h
144
+++ b/tcg/tcg-internal.h
145
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t)
146
return temp_tcgv_i64(tcgv_i128_temp(t) + o);
147
}
148
149
+bool tcg_target_has_memory_bswap(MemOp memop);
150
+
151
#endif /* TCG_INTERNAL_H */
152
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
153
index XXXXXXX..XXXXXXX 100644
154
--- a/tcg/tci/tcg-target.h
155
+++ b/tcg/tci/tcg-target.h
156
@@ -XXX,XX +XXX,XX @@ typedef enum {
157
We prefer consistency across hosts on this. */
158
#define TCG_TARGET_DEFAULT_MO (0)
159
160
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
161
-
162
#endif /* TCG_TARGET_H */
163
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/tcg/tcg-op.c
166
+++ b/tcg/tcg-op.c
167
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
168
oi = make_memop_idx(memop, idx);
169
170
orig_memop = memop;
171
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
172
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
173
memop &= ~MO_BSWAP;
174
/* The bswap primitive benefits from zero-extended input. */
175
if ((memop & MO_SSIZE) == MO_SW) {
176
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
177
memop = tcg_canonicalize_memop(memop, 0, 1);
178
oi = make_memop_idx(memop, idx);
179
180
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
181
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
182
swap = tcg_temp_ebb_new_i32();
183
switch (memop & MO_SIZE) {
184
case MO_16:
185
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
186
oi = make_memop_idx(memop, idx);
187
188
orig_memop = memop;
189
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
190
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
191
memop &= ~MO_BSWAP;
192
/* The bswap primitive benefits from zero-extended input. */
193
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
194
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
195
memop = tcg_canonicalize_memop(memop, 1, 1);
196
oi = make_memop_idx(memop, idx);
197
198
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
199
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
200
swap = tcg_temp_ebb_new_i64();
201
switch (memop & MO_SIZE) {
202
case MO_16:
203
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
204
tcg_debug_assert((orig & MO_SIZE) == MO_128);
205
tcg_debug_assert((orig & MO_SIGN) == 0);
206
207
- /* Use a memory ordering implemented by the host. */
208
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
209
- mop_1 &= ~MO_BSWAP;
210
- }
211
-
212
/* Reduce the size to 64-bit. */
213
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
214
215
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
216
default:
217
g_assert_not_reached();
218
}
219
+
220
+ /* Use a memory ordering implemented by the host. */
221
+ if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
222
+ mop_1 &= ~MO_BSWAP;
223
+ mop_2 &= ~MO_BSWAP;
224
+ }
225
+
226
ret[0] = mop_1;
227
ret[1] = mop_2;
228
}
229
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
230
index XXXXXXX..XXXXXXX 100644
231
--- a/tcg/aarch64/tcg-target.c.inc
232
+++ b/tcg/aarch64/tcg-target.c.inc
233
@@ -XXX,XX +XXX,XX @@ typedef struct {
234
TCGType index_ext;
235
} HostAddress;
236
237
+bool tcg_target_has_memory_bswap(MemOp memop)
238
+{
239
+ return false;
240
+}
241
+
242
static const TCGLdstHelperParam ldst_helper_param = {
243
.ntmp = 1, .tmp = { TCG_REG_TMP }
244
};
245
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
246
index XXXXXXX..XXXXXXX 100644
247
--- a/tcg/arm/tcg-target.c.inc
248
+++ b/tcg/arm/tcg-target.c.inc
249
@@ -XXX,XX +XXX,XX @@ typedef struct {
250
bool index_scratch;
251
} HostAddress;
252
253
+bool tcg_target_has_memory_bswap(MemOp memop)
254
+{
255
+ return false;
256
+}
257
+
258
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
259
{
260
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
261
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
262
index XXXXXXX..XXXXXXX 100644
263
--- a/tcg/i386/tcg-target.c.inc
264
+++ b/tcg/i386/tcg-target.c.inc
265
@@ -XXX,XX +XXX,XX @@ typedef struct {
266
int seg;
267
} HostAddress;
268
269
+bool tcg_target_has_memory_bswap(MemOp memop)
270
+{
271
+ return have_movbe;
272
+}
273
+
274
/*
275
* Because i686 has no register parameters and because x86_64 has xchg
276
* to handle addr/data register overlap, we have placed all input arguments
277
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
278
index XXXXXXX..XXXXXXX 100644
279
--- a/tcg/loongarch64/tcg-target.c.inc
280
+++ b/tcg/loongarch64/tcg-target.c.inc
281
@@ -XXX,XX +XXX,XX @@ typedef struct {
282
TCGReg index;
283
} HostAddress;
284
285
+bool tcg_target_has_memory_bswap(MemOp memop)
286
+{
287
+ return false;
288
+}
289
+
290
/*
291
* For softmmu, perform the TLB load and compare.
292
* For useronly, perform any required alignment tests.
293
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
294
index XXXXXXX..XXXXXXX 100644
295
--- a/tcg/mips/tcg-target.c.inc
296
+++ b/tcg/mips/tcg-target.c.inc
297
@@ -XXX,XX +XXX,XX @@ typedef struct {
298
MemOp align;
299
} HostAddress;
300
301
+bool tcg_target_has_memory_bswap(MemOp memop)
302
+{
303
+ return false;
304
+}
305
+
306
/*
307
* For softmmu, perform the TLB load and compare.
308
* For useronly, perform any required alignment tests.
309
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
310
index XXXXXXX..XXXXXXX 100644
311
--- a/tcg/ppc/tcg-target.c.inc
312
+++ b/tcg/ppc/tcg-target.c.inc
313
@@ -XXX,XX +XXX,XX @@ typedef struct {
314
TCGReg index;
315
} HostAddress;
316
317
+bool tcg_target_has_memory_bswap(MemOp memop)
318
+{
319
+ return true;
320
+}
321
+
322
/*
323
* For softmmu, perform the TLB load and compare.
324
* For useronly, perform any required alignment tests.
325
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
326
index XXXXXXX..XXXXXXX 100644
327
--- a/tcg/riscv/tcg-target.c.inc
328
+++ b/tcg/riscv/tcg-target.c.inc
329
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
330
tcg_debug_assert(ok);
331
}
332
333
+bool tcg_target_has_memory_bswap(MemOp memop)
334
+{
335
+ return false;
336
+}
337
+
338
/* We have three temps, we might as well expose them. */
339
static const TCGLdstHelperParam ldst_helper_param = {
340
.ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
341
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
342
index XXXXXXX..XXXXXXX 100644
343
--- a/tcg/s390x/tcg-target.c.inc
344
+++ b/tcg/s390x/tcg-target.c.inc
345
@@ -XXX,XX +XXX,XX @@ typedef struct {
346
int disp;
347
} HostAddress;
348
349
+bool tcg_target_has_memory_bswap(MemOp memop)
350
+{
351
+ return true;
352
+}
353
+
354
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
355
HostAddress h)
356
{
357
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
358
index XXXXXXX..XXXXXXX 100644
359
--- a/tcg/sparc64/tcg-target.c.inc
360
+++ b/tcg/sparc64/tcg-target.c.inc
361
@@ -XXX,XX +XXX,XX @@ typedef struct {
362
TCGReg index;
363
} HostAddress;
364
365
+bool tcg_target_has_memory_bswap(MemOp memop)
366
+{
367
+ return true;
368
+}
369
+
370
/*
371
* For softmmu, perform the TLB load and compare.
372
* For useronly, perform any required alignment tests.
373
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
374
index XXXXXXX..XXXXXXX 100644
375
--- a/tcg/tci/tcg-target.c.inc
376
+++ b/tcg/tci/tcg-target.c.inc
377
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
378
static inline void tcg_target_qemu_prologue(TCGContext *s)
379
{
380
}
381
+
382
+bool tcg_target_has_memory_bswap(MemOp memop)
383
+{
384
+ return true;
385
+}
386
--
387
2.34.1
diff view generated by jsdifflib
New patch
1
Add opcodes for backend support for 128-bit memory operations.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
docs/devel/tcg-ops.rst | 11 +++---
8
include/tcg/tcg-opc.h | 8 +++++
9
tcg/aarch64/tcg-target.h | 2 ++
10
tcg/arm/tcg-target.h | 2 ++
11
tcg/i386/tcg-target.h | 2 ++
12
tcg/loongarch64/tcg-target.h | 1 +
13
tcg/mips/tcg-target.h | 2 ++
14
tcg/ppc/tcg-target.h | 2 ++
15
tcg/riscv/tcg-target.h | 2 ++
16
tcg/s390x/tcg-target.h | 2 ++
17
tcg/sparc64/tcg-target.h | 2 ++
18
tcg/tci/tcg-target.h | 2 ++
19
tcg/tcg-op.c | 69 ++++++++++++++++++++++++++++++++----
20
tcg/tcg.c | 6 ++++
21
14 files changed, 103 insertions(+), 10 deletions(-)
22
23
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
24
index XXXXXXX..XXXXXXX 100644
25
--- a/docs/devel/tcg-ops.rst
26
+++ b/docs/devel/tcg-ops.rst
27
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
28
| This operation is optional. If the TCG backend does not implement the
29
goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
30
31
- * - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx*
32
+ * - qemu_ld_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
33
34
- qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx*
35
+ qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
36
37
qemu_st8_i32 *t0*, *t1*, *flags*, *memidx*
38
39
- | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest
40
- address *t1*. The _i32/_i64 size applies to the size of the input/output
41
+ address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output
42
register *t0* only. The address *t1* is always sized according to the guest,
43
and the width of the memory operation is controlled by *flags*.
44
|
45
| Both *t0* and *t1* may be split into little-endian ordered pairs of registers
46
- if dealing with 64-bit quantities on a 32-bit host.
47
+ if dealing with 64-bit quantities on a 32-bit host, or 128-bit quantities on
48
+ a 64-bit host.
49
|
50
| The *memidx* selects the qemu tlb index to use (e.g. user or kernel access).
51
The flags are the MemOp bits, selecting the sign, width, and endianness
52
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
53
| For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
54
64-bit memory access specified in *flags*.
55
|
56
+ | For qemu_ld/st_i128, these are only supported for a 64-bit host.
57
+ |
58
| For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
59
the memory operation is known to be 8-bit. This allows the backend to
60
provide a different set of register constraints.
61
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/tcg/tcg-opc.h
64
+++ b/include/tcg/tcg-opc.h
65
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
66
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
67
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
68
69
+/* Only for 64-bit hosts at the moment. */
70
+DEF(qemu_ld_i128, 2, 1, 1,
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
72
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
73
+DEF(qemu_st_i128, 0, 3, 1,
74
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
75
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
76
+
77
/* Host vector support. */
78
79
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
80
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/aarch64/tcg-target.h
83
+++ b/tcg/aarch64/tcg-target.h
84
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
85
#define TCG_TARGET_HAS_muluh_i64 1
86
#define TCG_TARGET_HAS_mulsh_i64 1
87
88
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
89
+
90
#define TCG_TARGET_HAS_v64 1
91
#define TCG_TARGET_HAS_v128 1
92
#define TCG_TARGET_HAS_v256 0
93
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/tcg/arm/tcg-target.h
96
+++ b/tcg/arm/tcg-target.h
97
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
98
#define TCG_TARGET_HAS_rem_i32 0
99
#define TCG_TARGET_HAS_qemu_st8_i32 0
100
101
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
102
+
103
#define TCG_TARGET_HAS_v64 use_neon_instructions
104
#define TCG_TARGET_HAS_v128 use_neon_instructions
105
#define TCG_TARGET_HAS_v256 0
106
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
107
index XXXXXXX..XXXXXXX 100644
108
--- a/tcg/i386/tcg-target.h
109
+++ b/tcg/i386/tcg-target.h
110
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
111
#define TCG_TARGET_HAS_qemu_st8_i32 1
112
#endif
113
114
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
115
+
116
/* We do not support older SSE systems, only beginning with AVX1. */
117
#define TCG_TARGET_HAS_v64 have_avx1
118
#define TCG_TARGET_HAS_v128 have_avx1
119
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
120
index XXXXXXX..XXXXXXX 100644
121
--- a/tcg/loongarch64/tcg-target.h
122
+++ b/tcg/loongarch64/tcg-target.h
123
@@ -XXX,XX +XXX,XX @@ typedef enum {
124
#define TCG_TARGET_HAS_muls2_i64 0
125
#define TCG_TARGET_HAS_muluh_i64 1
126
#define TCG_TARGET_HAS_mulsh_i64 1
127
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
128
129
#define TCG_TARGET_DEFAULT_MO (0)
130
131
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/tcg/mips/tcg-target.h
134
+++ b/tcg/mips/tcg-target.h
135
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
136
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
137
#endif
138
139
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
140
+
141
#define TCG_TARGET_DEFAULT_MO 0
142
#define TCG_TARGET_NEED_LDST_LABELS
143
144
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
145
index XXXXXXX..XXXXXXX 100644
146
--- a/tcg/ppc/tcg-target.h
147
+++ b/tcg/ppc/tcg-target.h
148
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
149
#define TCG_TARGET_HAS_mulsh_i64 1
150
#endif
151
152
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
153
+
154
/*
155
* While technically Altivec could support V64, it has no 64-bit store
156
* instruction and substituting two 32-bit stores makes the generated
157
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tcg/riscv/tcg-target.h
160
+++ b/tcg/riscv/tcg-target.h
161
@@ -XXX,XX +XXX,XX @@ typedef enum {
162
#define TCG_TARGET_HAS_muluh_i64 1
163
#define TCG_TARGET_HAS_mulsh_i64 1
164
165
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
166
+
167
#define TCG_TARGET_DEFAULT_MO (0)
168
169
#define TCG_TARGET_NEED_LDST_LABELS
170
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
171
index XXXXXXX..XXXXXXX 100644
172
--- a/tcg/s390x/tcg-target.h
173
+++ b/tcg/s390x/tcg-target.h
174
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
175
#define TCG_TARGET_HAS_muluh_i64 0
176
#define TCG_TARGET_HAS_mulsh_i64 0
177
178
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
179
+
180
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
181
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
182
#define TCG_TARGET_HAS_v256 0
183
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
184
index XXXXXXX..XXXXXXX 100644
185
--- a/tcg/sparc64/tcg-target.h
186
+++ b/tcg/sparc64/tcg-target.h
187
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
188
#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
189
#define TCG_TARGET_HAS_mulsh_i64 0
190
191
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
192
+
193
#define TCG_AREG0 TCG_REG_I0
194
195
#define TCG_TARGET_DEFAULT_MO (0)
196
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
197
index XXXXXXX..XXXXXXX 100644
198
--- a/tcg/tci/tcg-target.h
199
+++ b/tcg/tci/tcg-target.h
200
@@ -XXX,XX +XXX,XX @@
201
#define TCG_TARGET_HAS_mulu2_i32 1
202
#endif /* TCG_TARGET_REG_BITS == 64 */
203
204
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
205
+
206
/* Number of registers available. */
207
#define TCG_TARGET_NB_REGS 16
208
209
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
210
index XXXXXXX..XXXXXXX 100644
211
--- a/tcg/tcg-op.c
212
+++ b/tcg/tcg-op.c
213
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
214
215
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
216
{
217
- MemOpIdx oi = make_memop_idx(memop, idx);
218
+ const MemOpIdx oi = make_memop_idx(memop, idx);
219
220
tcg_debug_assert((memop & MO_SIZE) == MO_128);
221
tcg_debug_assert((memop & MO_SIGN) == 0);
222
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
223
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
224
addr = plugin_prep_mem_callbacks(addr);
225
226
- /* TODO: allow the tcg backend to see the whole operation. */
227
+ /* TODO: For now, force 32-bit hosts to use the helper. */
228
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
229
+ TCGv_i64 lo, hi;
230
+ TCGArg addr_arg;
231
+ MemOpIdx adj_oi;
232
+ bool need_bswap = false;
233
234
- if (use_two_i64_for_i128(memop)) {
235
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
236
+ lo = TCGV128_HIGH(val);
237
+ hi = TCGV128_LOW(val);
238
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
239
+ need_bswap = true;
240
+ } else {
241
+ lo = TCGV128_LOW(val);
242
+ hi = TCGV128_HIGH(val);
243
+ adj_oi = oi;
244
+ }
245
+
246
+#if TARGET_LONG_BITS == 32
247
+ addr_arg = tcgv_i32_arg(addr);
248
+#else
249
+ addr_arg = tcgv_i64_arg(addr);
250
+#endif
251
+ tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
252
+
253
+ if (need_bswap) {
254
+ tcg_gen_bswap64_i64(lo, lo);
255
+ tcg_gen_bswap64_i64(hi, hi);
256
+ }
257
+ } else if (use_two_i64_for_i128(memop)) {
258
MemOp mop[2];
259
TCGv addr_p8;
260
TCGv_i64 x, y;
261
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
262
263
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
264
{
265
- MemOpIdx oi = make_memop_idx(memop, idx);
266
+ const MemOpIdx oi = make_memop_idx(memop, idx);
267
268
tcg_debug_assert((memop & MO_SIZE) == MO_128);
269
tcg_debug_assert((memop & MO_SIGN) == 0);
270
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
271
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
272
addr = plugin_prep_mem_callbacks(addr);
273
274
- /* TODO: allow the tcg backend to see the whole operation. */
275
+ /* TODO: For now, force 32-bit hosts to use the helper. */
276
277
- if (use_two_i64_for_i128(memop)) {
278
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
279
+ TCGv_i64 lo, hi;
280
+ TCGArg addr_arg;
281
+ MemOpIdx adj_oi;
282
+ bool need_bswap = false;
283
+
284
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
285
+ lo = tcg_temp_new_i64();
286
+ hi = tcg_temp_new_i64();
287
+ tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
288
+ tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
289
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
290
+ need_bswap = true;
291
+ } else {
292
+ lo = TCGV128_LOW(val);
293
+ hi = TCGV128_HIGH(val);
294
+ adj_oi = oi;
295
+ }
296
+
297
+#if TARGET_LONG_BITS == 32
298
+ addr_arg = tcgv_i32_arg(addr);
299
+#else
300
+ addr_arg = tcgv_i64_arg(addr);
301
+#endif
302
+ tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
303
+
304
+ if (need_bswap) {
305
+ tcg_temp_free_i64(lo);
306
+ tcg_temp_free_i64(hi);
307
+ }
308
+ } else if (use_two_i64_for_i128(memop)) {
309
MemOp mop[2];
310
TCGv addr_p8;
311
TCGv_i64 x, y;
312
diff --git a/tcg/tcg.c b/tcg/tcg.c
313
index XXXXXXX..XXXXXXX 100644
314
--- a/tcg/tcg.c
315
+++ b/tcg/tcg.c
316
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
317
case INDEX_op_qemu_st8_i32:
318
return TCG_TARGET_HAS_qemu_st8_i32;
319
320
+ case INDEX_op_qemu_ld_i128:
321
+ case INDEX_op_qemu_st_i128:
322
+ return TCG_TARGET_HAS_qemu_ldst_i128;
323
+
324
case INDEX_op_mov_i32:
325
case INDEX_op_setcond_i32:
326
case INDEX_op_brcond_i32:
327
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
328
case INDEX_op_qemu_st8_i32:
329
case INDEX_op_qemu_ld_i64:
330
case INDEX_op_qemu_st_i64:
331
+ case INDEX_op_qemu_ld_i128:
332
+ case INDEX_op_qemu_st_i128:
333
{
334
const char *s_al, *s_op, *s_at;
335
MemOpIdx oi = op->args[k++];
336
--
337
2.34.1
338
339
diff view generated by jsdifflib
New patch
1
With x86_64 as host, we do not have any temporaries with which to
2
resolve cycles, but we do have xchg. As a side bonus, the set of
3
graphs that can be made with 3 nodes and all nodes conflicting is
4
small: two. We can solve the cycle with a single temp.
1
5
6
This is required for x86_64 to handle stores of i128: 1 address
7
register and 2 data registers.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
tcg/tcg.c | 138 ++++++++++++++++++++++++++++++++++++++++++------------
13
1 file changed, 108 insertions(+), 30 deletions(-)
14
15
diff --git a/tcg/tcg.c b/tcg/tcg.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/tcg.c
18
+++ b/tcg/tcg.c
19
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
20
tcg_out_movext1_new_src(s, i1, src1);
21
}
22
23
+/**
24
+ * tcg_out_movext3 -- move and extend three pair
25
+ * @s: tcg context
26
+ * @i1: first move description
27
+ * @i2: second move description
28
+ * @i3: third move description
29
+ * @scratch: temporary register, or -1 for none
30
+ *
31
+ * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
32
+ * between the sources and destinations.
33
+ */
34
+
35
+static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
36
+ const TCGMovExtend *i2, const TCGMovExtend *i3,
37
+ int scratch)
38
+{
39
+ TCGReg src1 = i1->src;
40
+ TCGReg src2 = i2->src;
41
+ TCGReg src3 = i3->src;
42
+
43
+ if (i1->dst != src2 && i1->dst != src3) {
44
+ tcg_out_movext1(s, i1);
45
+ tcg_out_movext2(s, i2, i3, scratch);
46
+ return;
47
+ }
48
+ if (i2->dst != src1 && i2->dst != src3) {
49
+ tcg_out_movext1(s, i2);
50
+ tcg_out_movext2(s, i1, i3, scratch);
51
+ return;
52
+ }
53
+ if (i3->dst != src1 && i3->dst != src2) {
54
+ tcg_out_movext1(s, i3);
55
+ tcg_out_movext2(s, i1, i2, scratch);
56
+ return;
57
+ }
58
+
59
+ /*
60
+ * There is a cycle. Since there are only 3 nodes, the cycle is
61
+ * either "clockwise" or "anti-clockwise", and can be solved with
62
+ * a single scratch or two xchg.
63
+ */
64
+ if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
65
+ /* "Clockwise" */
66
+ if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
67
+ tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
68
+ /* The data is now in the correct registers, now extend. */
69
+ tcg_out_movext1_new_src(s, i1, i1->dst);
70
+ tcg_out_movext1_new_src(s, i2, i2->dst);
71
+ tcg_out_movext1_new_src(s, i3, i3->dst);
72
+ } else {
73
+ tcg_debug_assert(scratch >= 0);
74
+ tcg_out_mov(s, i1->src_type, scratch, src1);
75
+ tcg_out_movext1(s, i3);
76
+ tcg_out_movext1(s, i2);
77
+ tcg_out_movext1_new_src(s, i1, scratch);
78
+ }
79
+ } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
80
+ /* "Anti-clockwise" */
81
+ if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
82
+ tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
83
+ /* The data is now in the correct registers, now extend. */
84
+ tcg_out_movext1_new_src(s, i1, i1->dst);
85
+ tcg_out_movext1_new_src(s, i2, i2->dst);
86
+ tcg_out_movext1_new_src(s, i3, i3->dst);
87
+ } else {
88
+ tcg_debug_assert(scratch >= 0);
89
+ tcg_out_mov(s, i1->src_type, scratch, src1);
90
+ tcg_out_movext1(s, i2);
91
+ tcg_out_movext1(s, i3);
92
+ tcg_out_movext1_new_src(s, i1, scratch);
93
+ }
94
+ } else {
95
+ g_assert_not_reached();
96
+ }
97
+}
98
+
99
#define C_PFX1(P, A) P##A
100
#define C_PFX2(P, A, B) P##A##_##B
101
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
102
@@ -XXX,XX +XXX,XX @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
103
104
static void tcg_out_helper_load_regs(TCGContext *s,
105
unsigned nmov, TCGMovExtend *mov,
106
- unsigned ntmp, const int *tmp)
107
+ const TCGLdstHelperParam *parm)
108
{
109
+ TCGReg dst3;
110
+
111
switch (nmov) {
112
- default:
113
+ case 4:
114
/* The backend must have provided enough temps for the worst case. */
115
- tcg_debug_assert(ntmp + 1 >= nmov);
116
+ tcg_debug_assert(parm->ntmp >= 2);
117
118
- for (unsigned i = nmov - 1; i >= 2; --i) {
119
- TCGReg dst = mov[i].dst;
120
+ dst3 = mov[3].dst;
121
+ for (unsigned j = 0; j < 3; ++j) {
122
+ if (dst3 == mov[j].src) {
123
+ /*
124
+ * Conflict. Copy the source to a temporary, perform the
125
+ * remaining moves, then the extension from our scratch
126
+ * on the way out.
127
+ */
128
+ TCGReg scratch = parm->tmp[1];
129
130
- for (unsigned j = 0; j < i; ++j) {
131
- if (dst == mov[j].src) {
132
- /*
133
- * Conflict.
134
- * Copy the source to a temporary, recurse for the
135
- * remaining moves, perform the extension from our
136
- * scratch on the way out.
137
- */
138
- TCGReg scratch = tmp[--ntmp];
139
- tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
140
- mov[i].src = scratch;
141
-
142
- tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
143
- tcg_out_movext1(s, &mov[i]);
144
- return;
145
- }
146
+ tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
147
+ tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
148
+ tcg_out_movext1_new_src(s, &mov[3], scratch);
149
+ break;
150
}
151
-
152
- /* No conflicts: perform this move and continue. */
153
- tcg_out_movext1(s, &mov[i]);
154
}
155
- /* fall through for the final two moves */
156
157
+ /* No conflicts: perform this move and continue. */
158
+ tcg_out_movext1(s, &mov[3]);
159
+ /* fall through */
160
+
161
+ case 3:
162
+ tcg_out_movext3(s, mov, mov + 1, mov + 2,
163
+ parm->ntmp ? parm->tmp[0] : -1);
164
+ break;
165
case 2:
166
- tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
167
- return;
168
+ tcg_out_movext2(s, mov, mov + 1,
169
+ parm->ntmp ? parm->tmp[0] : -1);
170
+ break;
171
case 1:
172
tcg_out_movext1(s, mov);
173
- return;
174
- case 0:
175
+ break;
176
+ default:
177
g_assert_not_reached();
178
}
179
}
180
@@ -XXX,XX +XXX,XX @@ static void tcg_out_helper_load_slots(TCGContext *s,
181
for (i = 0; i < nmov; ++i) {
182
mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
183
}
184
- tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
185
+ tcg_out_helper_load_regs(s, nmov, mov, parm);
186
}
187
188
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
189
--
190
2.34.1
diff view generated by jsdifflib
New patch
1
Now that tcg_out_helper_load_regs is not recursive, we can
2
merge it into its only caller, tcg_out_helper_load_slots.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 89 +++++++++++++++++++++++++------------------------------
8
1 file changed, 41 insertions(+), 48 deletions(-)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
15
return ofs;
16
}
17
18
-static void tcg_out_helper_load_regs(TCGContext *s,
19
- unsigned nmov, TCGMovExtend *mov,
20
- const TCGLdstHelperParam *parm)
21
+static void tcg_out_helper_load_slots(TCGContext *s,
22
+ unsigned nmov, TCGMovExtend *mov,
23
+ const TCGLdstHelperParam *parm)
24
{
25
+ unsigned i;
26
TCGReg dst3;
27
28
+ /*
29
+ * Start from the end, storing to the stack first.
30
+ * This frees those registers, so we need not consider overlap.
31
+ */
32
+ for (i = nmov; i-- > 0; ) {
33
+ unsigned slot = mov[i].dst;
34
+
35
+ if (arg_slot_reg_p(slot)) {
36
+ goto found_reg;
37
+ }
38
+
39
+ TCGReg src = mov[i].src;
40
+ TCGType dst_type = mov[i].dst_type;
41
+ MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
42
+
43
+ /* The argument is going onto the stack; extend into scratch. */
44
+ if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
45
+ tcg_debug_assert(parm->ntmp != 0);
46
+ mov[i].dst = src = parm->tmp[0];
47
+ tcg_out_movext1(s, &mov[i]);
48
+ }
49
+
50
+ tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
51
+ tcg_out_helper_stk_ofs(dst_type, slot));
52
+ }
53
+ return;
54
+
55
+ found_reg:
56
+ /*
57
+ * The remaining arguments are in registers.
58
+ * Convert slot numbers to argument registers.
59
+ */
60
+ nmov = i + 1;
61
+ for (i = 0; i < nmov; ++i) {
62
+ mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
63
+ }
64
+
65
switch (nmov) {
66
case 4:
67
/* The backend must have provided enough temps for the worst case. */
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_helper_load_regs(TCGContext *s,
69
}
70
}
71
72
-static void tcg_out_helper_load_slots(TCGContext *s,
73
- unsigned nmov, TCGMovExtend *mov,
74
- const TCGLdstHelperParam *parm)
75
-{
76
- unsigned i;
77
-
78
- /*
79
- * Start from the end, storing to the stack first.
80
- * This frees those registers, so we need not consider overlap.
81
- */
82
- for (i = nmov; i-- > 0; ) {
83
- unsigned slot = mov[i].dst;
84
-
85
- if (arg_slot_reg_p(slot)) {
86
- goto found_reg;
87
- }
88
-
89
- TCGReg src = mov[i].src;
90
- TCGType dst_type = mov[i].dst_type;
91
- MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
92
-
93
- /* The argument is going onto the stack; extend into scratch. */
94
- if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
95
- tcg_debug_assert(parm->ntmp != 0);
96
- mov[i].dst = src = parm->tmp[0];
97
- tcg_out_movext1(s, &mov[i]);
98
- }
99
-
100
- tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
101
- tcg_out_helper_stk_ofs(dst_type, slot));
102
- }
103
- return;
104
-
105
- found_reg:
106
- /*
107
- * The remaining arguments are in registers.
108
- * Convert slot numbers to argument registers.
109
- */
110
- nmov = i + 1;
111
- for (i = 0; i < nmov; ++i) {
112
- mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
113
- }
114
- tcg_out_helper_load_regs(s, nmov, mov, parm);
115
-}
116
-
117
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
118
TCGType type, tcg_target_long imm,
119
const TCGLdstHelperParam *parm)
120
--
121
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/tcg.c | 196 +++++++++++++++++++++++++++++++++++++++++++++---------
5
1 file changed, 163 insertions(+), 33 deletions(-)
1
6
7
diff --git a/tcg/tcg.c b/tcg/tcg.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/tcg.c
10
+++ b/tcg/tcg.c
11
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
12
[MO_UQ] = helper_ldq_mmu,
13
#if TCG_TARGET_REG_BITS == 64
14
[MO_SL] = helper_ldsl_mmu,
15
+ [MO_128] = helper_ld16_mmu,
16
#endif
17
};
18
19
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
20
[MO_16] = helper_stw_mmu,
21
[MO_32] = helper_stl_mmu,
22
[MO_64] = helper_stq_mmu,
23
+#if TCG_TARGET_REG_BITS == 64
24
+ [MO_128] = helper_st16_mmu,
25
+#endif
26
};
27
28
TCGContext tcg_init_ctx;
29
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld64_mmu = {
30
| dh_typemask(ptr, 4) /* uintptr_t ra */
31
};
32
33
+static TCGHelperInfo info_helper_ld128_mmu = {
34
+ .flags = TCG_CALL_NO_WG,
35
+ .typemask = dh_typemask(i128, 0) /* return Int128 */
36
+ | dh_typemask(env, 1)
37
+ | dh_typemask(tl, 2) /* target_ulong addr */
38
+ | dh_typemask(i32, 3) /* unsigned oi */
39
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
40
+};
41
+
42
static TCGHelperInfo info_helper_st32_mmu = {
43
.flags = TCG_CALL_NO_WG,
44
.typemask = dh_typemask(void, 0)
45
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st64_mmu = {
46
| dh_typemask(ptr, 5) /* uintptr_t ra */
47
};
48
49
+static TCGHelperInfo info_helper_st128_mmu = {
50
+ .flags = TCG_CALL_NO_WG,
51
+ .typemask = dh_typemask(void, 0)
52
+ | dh_typemask(env, 1)
53
+ | dh_typemask(tl, 2) /* target_ulong addr */
54
+ | dh_typemask(i128, 3) /* Int128 data */
55
+ | dh_typemask(i32, 4) /* unsigned oi */
56
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
57
+};
58
+
59
#ifdef CONFIG_TCG_INTERPRETER
60
static ffi_type *typecode_to_ffi(int argmask)
61
{
62
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
63
64
init_call_layout(&info_helper_ld32_mmu);
65
init_call_layout(&info_helper_ld64_mmu);
66
+ init_call_layout(&info_helper_ld128_mmu);
67
init_call_layout(&info_helper_st32_mmu);
68
init_call_layout(&info_helper_st64_mmu);
69
+ init_call_layout(&info_helper_st128_mmu);
70
71
#ifdef CONFIG_TCG_INTERPRETER
72
init_ffi_layouts();
73
@@ -XXX,XX +XXX,XX @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
74
TCGType dst_type, TCGType src_type,
75
TCGReg lo, TCGReg hi)
76
{
77
+ MemOp reg_mo;
78
+
79
if (dst_type <= TCG_TYPE_REG) {
80
MemOp src_ext;
81
82
@@ -XXX,XX +XXX,XX @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
83
return 1;
84
}
85
86
- assert(TCG_TARGET_REG_BITS == 32);
87
+ if (TCG_TARGET_REG_BITS == 32) {
88
+ assert(dst_type == TCG_TYPE_I64);
89
+ reg_mo = MO_32;
90
+ } else {
91
+ assert(dst_type == TCG_TYPE_I128);
92
+ reg_mo = MO_64;
93
+ }
94
95
mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
96
mov[0].src = lo;
97
- mov[0].dst_type = TCG_TYPE_I32;
98
- mov[0].src_type = TCG_TYPE_I32;
99
- mov[0].src_ext = MO_32;
100
+ mov[0].dst_type = TCG_TYPE_REG;
101
+ mov[0].src_type = TCG_TYPE_REG;
102
+ mov[0].src_ext = reg_mo;
103
104
mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
105
mov[1].src = hi;
106
- mov[1].dst_type = TCG_TYPE_I32;
107
- mov[1].src_type = TCG_TYPE_I32;
108
- mov[1].src_ext = MO_32;
109
+ mov[1].dst_type = TCG_TYPE_REG;
110
+ mov[1].src_type = TCG_TYPE_REG;
111
+ mov[1].src_ext = reg_mo;
112
113
return 2;
114
}
115
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
116
case MO_64:
117
info = &info_helper_ld64_mmu;
118
break;
119
+ case MO_128:
120
+ info = &info_helper_ld128_mmu;
121
+ break;
122
default:
123
g_assert_not_reached();
124
}
125
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
126
127
tcg_out_helper_load_slots(s, nmov, mov, parm);
128
129
- /* No special attention for 32 and 64-bit return values. */
130
- tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
131
+ switch (info->out_kind) {
132
+ case TCG_CALL_RET_NORMAL:
133
+ case TCG_CALL_RET_BY_VEC:
134
+ break;
135
+ case TCG_CALL_RET_BY_REF:
136
+ /*
137
+ * The return reference is in the first argument slot.
138
+ * We need memory in which to return: re-use the top of stack.
139
+ */
140
+ {
141
+ int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
142
+
143
+ if (arg_slot_reg_p(0)) {
144
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
145
+ TCG_REG_CALL_STACK, ofs_slot0);
146
+ } else {
147
+ tcg_debug_assert(parm->ntmp != 0);
148
+ tcg_out_addi_ptr(s, parm->tmp[0],
149
+ TCG_REG_CALL_STACK, ofs_slot0);
150
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
151
+ TCG_REG_CALL_STACK, ofs_slot0);
152
+ }
153
+ }
154
+ break;
155
+ default:
156
+ g_assert_not_reached();
157
+ }
158
159
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
160
}
161
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
162
bool load_sign,
163
const TCGLdstHelperParam *parm)
164
{
165
+ MemOp mop = get_memop(ldst->oi);
166
TCGMovExtend mov[2];
167
+ int ofs_slot0;
168
169
- if (ldst->type <= TCG_TYPE_REG) {
170
- MemOp mop = get_memop(ldst->oi);
171
+ switch (ldst->type) {
172
+ case TCG_TYPE_I64:
173
+ if (TCG_TARGET_REG_BITS == 32) {
174
+ break;
175
+ }
176
+ /* fall through */
177
178
+ case TCG_TYPE_I32:
179
mov[0].dst = ldst->datalo_reg;
180
mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
181
mov[0].dst_type = ldst->type;
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
183
mov[0].src_ext = mop & MO_SSIZE;
184
}
185
tcg_out_movext1(s, mov);
186
- } else {
187
- assert(TCG_TARGET_REG_BITS == 32);
188
+ return;
189
190
- mov[0].dst = ldst->datalo_reg;
191
- mov[0].src =
192
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
193
- mov[0].dst_type = TCG_TYPE_I32;
194
- mov[0].src_type = TCG_TYPE_I32;
195
- mov[0].src_ext = MO_32;
196
+ case TCG_TYPE_I128:
197
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
198
+ ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
199
+ switch (TCG_TARGET_CALL_RET_I128) {
200
+ case TCG_CALL_RET_NORMAL:
201
+ break;
202
+ case TCG_CALL_RET_BY_VEC:
203
+ tcg_out_st(s, TCG_TYPE_V128,
204
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
205
+ TCG_REG_CALL_STACK, ofs_slot0);
206
+ /* fall through */
207
+ case TCG_CALL_RET_BY_REF:
208
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
209
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
210
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
211
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
212
+ return;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ break;
217
218
- mov[1].dst = ldst->datahi_reg;
219
- mov[1].src =
220
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
221
- mov[1].dst_type = TCG_TYPE_REG;
222
- mov[1].src_type = TCG_TYPE_REG;
223
- mov[1].src_ext = MO_32;
224
-
225
- tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
226
+ default:
227
+ g_assert_not_reached();
228
}
229
+
230
+ mov[0].dst = ldst->datalo_reg;
231
+ mov[0].src =
232
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
233
+ mov[0].dst_type = TCG_TYPE_I32;
234
+ mov[0].src_type = TCG_TYPE_I32;
235
+ mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
236
+
237
+ mov[1].dst = ldst->datahi_reg;
238
+ mov[1].src =
239
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
240
+ mov[1].dst_type = TCG_TYPE_REG;
241
+ mov[1].src_type = TCG_TYPE_REG;
242
+ mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
243
+
244
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
245
}
246
247
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
248
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
249
info = &info_helper_st64_mmu;
250
data_type = TCG_TYPE_I64;
251
break;
252
+ case MO_128:
253
+ info = &info_helper_st128_mmu;
254
+ data_type = TCG_TYPE_I128;
255
+ break;
256
default:
257
g_assert_not_reached();
258
}
259
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
260
261
/* Handle data argument. */
262
loc = &info->in[next_arg];
263
- n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
264
- ldst->datalo_reg, ldst->datahi_reg);
265
- next_arg += n;
266
- nmov += n;
267
- tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
268
+ switch (loc->kind) {
269
+ case TCG_CALL_ARG_NORMAL:
270
+ case TCG_CALL_ARG_EXTEND_U:
271
+ case TCG_CALL_ARG_EXTEND_S:
272
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
273
+ ldst->datalo_reg, ldst->datahi_reg);
274
+ next_arg += n;
275
+ nmov += n;
276
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
277
+ break;
278
+
279
+ case TCG_CALL_ARG_BY_REF:
280
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
281
+ tcg_debug_assert(data_type == TCG_TYPE_I128);
282
+ tcg_out_st(s, TCG_TYPE_I64,
283
+ HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
284
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
285
+ tcg_out_st(s, TCG_TYPE_I64,
286
+ HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
287
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
288
+
289
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
290
+
291
+ if (arg_slot_reg_p(loc->arg_slot)) {
292
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
293
+ TCG_REG_CALL_STACK,
294
+ arg_slot_stk_ofs(loc->ref_slot));
295
+ } else {
296
+ tcg_debug_assert(parm->ntmp != 0);
297
+ tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
298
+ arg_slot_stk_ofs(loc->ref_slot));
299
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
300
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
301
+ }
302
+ next_arg += 2;
303
+ break;
304
+
305
+ default:
306
+ g_assert_not_reached();
307
+ }
308
309
- tcg_out_helper_load_slots(s, nmov, mov, parm);
310
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
311
}
312
313
--
314
2.34.1
diff view generated by jsdifflib
New patch
1
Examine MemOp for atomicity and alignment, adjusting alignment
2
as required to implement atomicity on the host.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
8
1 file changed, 95 insertions(+)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
15
#endif
16
};
17
18
+typedef struct {
19
+ MemOp atom; /* lg2 bits of atomicity required */
20
+ MemOp align; /* lg2 bits of alignment to use */
21
+} TCGAtomAlign;
22
+
23
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
24
+ MemOp host_atom, bool allow_two_ops)
25
+ __attribute__((unused));
26
+
27
TCGContext tcg_init_ctx;
28
__thread TCGContext *tcg_ctx;
29
30
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
31
}
32
}
33
34
+/**
35
+ * atom_and_align_for_opc:
36
+ * @s: tcg context
37
+ * @opc: memory operation code
38
+ * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
39
+ * @allow_two_ops: true if we are prepared to issue two operations
40
+ *
41
+ * Return the alignment and atomicity to use for the inline fast path
42
+ * for the given memory operation. The alignment may be larger than
43
+ * that specified in @opc, and the correct alignment will be diagnosed
44
+ * by the slow path helper.
45
+ *
46
+ * If @allow_two_ops, the host is prepared to test for 2x alignment,
47
+ * and issue two loads or stores for subalignment.
48
+ */
49
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
50
+ MemOp host_atom, bool allow_two_ops)
51
+{
52
+ MemOp align = get_alignment_bits(opc);
53
+ MemOp size = opc & MO_SIZE;
54
+ MemOp half = size ? size - 1 : 0;
55
+ MemOp atmax;
56
+ MemOp atom;
57
+
58
+ /* When serialized, no further atomicity required. */
59
+ if (s->gen_tb->cflags & CF_PARALLEL) {
60
+ atom = opc & MO_ATOM_MASK;
61
+ } else {
62
+ atom = MO_ATOM_NONE;
63
+ }
64
+
65
+ switch (atom) {
66
+ case MO_ATOM_NONE:
67
+ /* The operation requires no specific atomicity. */
68
+ atmax = MO_8;
69
+ break;
70
+
71
+ case MO_ATOM_IFALIGN:
72
+ atmax = size;
73
+ break;
74
+
75
+ case MO_ATOM_IFALIGN_PAIR:
76
+ atmax = half;
77
+ break;
78
+
79
+ case MO_ATOM_WITHIN16:
80
+ atmax = size;
81
+ if (size == MO_128) {
82
+ /* Misalignment implies !within16, and therefore no atomicity. */
83
+ } else if (host_atom != MO_ATOM_WITHIN16) {
84
+ /* The host does not implement within16, so require alignment. */
85
+ align = MAX(align, size);
86
+ }
87
+ break;
88
+
89
+ case MO_ATOM_WITHIN16_PAIR:
90
+ atmax = size;
91
+ /*
92
+ * Misalignment implies !within16, and therefore half atomicity.
93
+ * Any host prepared for two operations can implement this with
94
+ * half alignment.
95
+ */
96
+ if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
97
+ align = MAX(align, half);
98
+ }
99
+ break;
100
+
101
+ case MO_ATOM_SUBALIGN:
102
+ atmax = size;
103
+ if (host_atom != MO_ATOM_SUBALIGN) {
104
+ /* If unaligned but not odd, there are subobjects up to half. */
105
+ if (allow_two_ops) {
106
+ align = MAX(align, half);
107
+ } else {
108
+ align = MAX(align, size);
109
+ }
110
+ }
111
+ break;
112
+
113
+ default:
114
+ g_assert_not_reached();
115
+ }
116
+
117
+ return (TCGAtomAlign){ .atom = atmax, .align = align };
118
+}
119
+
120
/*
121
* Similarly for qemu_ld/st slow path helpers.
122
* We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
123
--
124
2.34.1
diff view generated by jsdifflib
New patch
1
No change to the ultimate load/store routines yet, so some atomicity
2
conditions not yet honored, but plumbs the change to alignment through
3
the relevant functions.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 27 +++++++++++++++------------
9
1 file changed, 15 insertions(+), 12 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
int index;
17
int ofs;
18
int seg;
19
+ TCGAtomAlign aa;
20
} HostAddress;
21
22
bool tcg_target_has_memory_bswap(MemOp memop)
23
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
{
25
TCGLabelQemuLdst *ldst = NULL;
26
MemOp opc = get_memop(oi);
27
- unsigned a_bits = get_alignment_bits(opc);
28
- unsigned a_mask = (1 << a_bits) - 1;
29
+ unsigned a_mask;
30
+
31
+#ifdef CONFIG_SOFTMMU
32
+ h->index = TCG_REG_L0;
33
+ h->ofs = 0;
34
+ h->seg = 0;
35
+#else
36
+ *h = x86_guest_base;
37
+#endif
38
+ h->base = addrlo;
39
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
40
+ a_mask = (1 << h->aa.align) - 1;
41
42
#ifdef CONFIG_SOFTMMU
43
int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
* copy the address and mask. For lesser alignments, check that we don't
46
* cross pages for the complete access.
47
*/
48
- if (a_bits >= s_bits) {
49
+ if (a_mask >= s_mask) {
50
tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
51
} else {
52
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
/* TLB Hit. */
55
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
56
offsetof(CPUTLBEntry, addend));
57
-
58
- *h = (HostAddress) {
59
- .base = addrlo,
60
- .index = TCG_REG_L0,
61
- };
62
#else
63
- if (a_bits) {
64
+ if (a_mask) {
65
ldst = new_ldst_label(s);
66
67
ldst->is_ld = is_ld;
68
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
69
ldst->label_ptr[0] = s->code_ptr;
70
s->code_ptr += 4;
71
}
72
-
73
- *h = x86_guest_base;
74
- h->base = addrlo;
75
#endif
76
77
return ldst;
78
--
79
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/aarch64/tcg-target.c.inc | 36 ++++++++++++++++++------------------
5
1 file changed, 18 insertions(+), 18 deletions(-)
1
6
7
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/aarch64/tcg-target.c.inc
10
+++ b/tcg/aarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ typedef struct {
12
TCGReg base;
13
TCGReg index;
14
TCGType index_ext;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned a_mask = (1u << a_bits) - 1;
25
+ unsigned a_mask;
26
+
27
+ h->aa = atom_and_align_for_opc(s, opc,
28
+ have_lse2 ? MO_ATOM_WITHIN16
29
+ : MO_ATOM_IFALIGN,
30
+ false);
31
+ a_mask = (1 << h->aa.align) - 1;
32
33
#ifdef CONFIG_SOFTMMU
34
unsigned s_bits = opc & MO_SIZE;
35
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
36
* bits within the address. For unaligned access, we check that we don't
37
* cross pages using the address of the last byte of the access.
38
*/
39
- if (a_bits >= s_bits) {
40
+ if (a_mask >= s_mask) {
41
x3 = addr_reg;
42
} else {
43
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
ldst->label_ptr[0] = s->code_ptr;
46
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
47
48
- *h = (HostAddress){
49
- .base = TCG_REG_X1,
50
- .index = addr_reg,
51
- .index_ext = addr_type
52
- };
53
+ h->base = TCG_REG_X1,
54
+ h->index = addr_reg;
55
+ h->index_ext = addr_type;
56
#else
57
if (a_mask) {
58
ldst = new_ldst_label(s);
59
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
60
}
61
62
if (USE_GUEST_BASE) {
63
- *h = (HostAddress){
64
- .base = TCG_REG_GUEST_BASE,
65
- .index = addr_reg,
66
- .index_ext = addr_type
67
- };
68
+ h->base = TCG_REG_GUEST_BASE;
69
+ h->index = addr_reg;
70
+ h->index_ext = addr_type;
71
} else {
72
- *h = (HostAddress){
73
- .base = addr_reg,
74
- .index = TCG_REG_XZR,
75
- .index_ext = TCG_TYPE_I64
76
- };
77
+ h->base = addr_reg;
78
+ h->index = TCG_REG_XZR;
79
+ h->index_ext = TCG_TYPE_I64;
80
}
81
#endif
82
83
--
84
2.34.1
diff view generated by jsdifflib
New patch
1
No change to the ultimate load/store routines yet, so some atomicity
2
conditions not yet honored, but plumbs the change to alignment through
3
the relevant functions.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/arm/tcg-target.c.inc | 39 ++++++++++++++++++++++-----------------
9
1 file changed, 22 insertions(+), 17 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
TCGReg base;
17
int index;
18
bool index_scratch;
19
+ TCGAtomAlign aa;
20
} HostAddress;
21
22
bool tcg_target_has_memory_bswap(MemOp memop)
23
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
{
25
TCGLabelQemuLdst *ldst = NULL;
26
MemOp opc = get_memop(oi);
27
- MemOp a_bits = get_alignment_bits(opc);
28
- unsigned a_mask = (1 << a_bits) - 1;
29
+ unsigned a_mask;
30
+
31
+#ifdef CONFIG_SOFTMMU
32
+ *h = (HostAddress){
33
+ .cond = COND_AL,
34
+ .base = addrlo,
35
+ .index = TCG_REG_R1,
36
+ .index_scratch = true,
37
+ };
38
+#else
39
+ *h = (HostAddress){
40
+ .cond = COND_AL,
41
+ .base = addrlo,
42
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
43
+ .index_scratch = false,
44
+ };
45
+#endif
46
+
47
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
48
+ a_mask = (1 << h->aa.align) - 1;
49
50
#ifdef CONFIG_SOFTMMU
51
int mem_index = get_mmuidx(oi);
52
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
53
if (TARGET_LONG_BITS == 64) {
54
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
55
}
56
-
57
- *h = (HostAddress){
58
- .cond = COND_AL,
59
- .base = addrlo,
60
- .index = TCG_REG_R1,
61
- .index_scratch = true,
62
- };
63
#else
64
if (a_mask) {
65
ldst = new_ldst_label(s);
66
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
67
ldst->addrlo_reg = addrlo;
68
ldst->addrhi_reg = addrhi;
69
70
- /* We are expecting a_bits to max out at 7 */
71
+ /* We are expecting alignment to max out at 7 */
72
tcg_debug_assert(a_mask <= 0xff);
73
/* tst addr, #mask */
74
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
75
}
76
-
77
- *h = (HostAddress){
78
- .cond = COND_AL,
79
- .base = addrlo,
80
- .index = guest_base ? TCG_REG_GUEST_BASE : -1,
81
- .index_scratch = false,
82
- };
83
#endif
84
85
return ldst;
86
--
87
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/loongarch64/tcg-target.c.inc | 6 +++++-
5
1 file changed, 5 insertions(+), 1 deletion(-)
1
6
7
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/loongarch64/tcg-target.c.inc
10
+++ b/tcg/loongarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
+ MemOp a_bits;
25
+
26
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
27
+ a_bits = h->aa.align;
28
29
#ifdef CONFIG_SOFTMMU
30
unsigned s_bits = opc & MO_SIZE;
31
--
32
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/mips/tcg-target.c.inc | 15 +++++++++------
5
1 file changed, 9 insertions(+), 6 deletions(-)
1
6
7
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/mips/tcg-target.c.inc
10
+++ b/tcg/mips/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
12
13
typedef struct {
14
TCGReg base;
15
- MemOp align;
16
+ TCGAtomAlign aa;
17
} HostAddress;
18
19
bool tcg_target_has_memory_bswap(MemOp memop)
20
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
21
{
22
TCGLabelQemuLdst *ldst = NULL;
23
MemOp opc = get_memop(oi);
24
- unsigned a_bits = get_alignment_bits(opc);
25
+ MemOp a_bits;
26
unsigned s_bits = opc & MO_SIZE;
27
- unsigned a_mask = (1 << a_bits) - 1;
28
+ unsigned a_mask;
29
TCGReg base;
30
31
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
32
+ a_bits = h->aa.align;
33
+ a_mask = (1 << a_bits) - 1;
34
+
35
#ifdef CONFIG_SOFTMMU
36
unsigned s_mask = (1 << s_bits) - 1;
37
int mem_index = get_mmuidx(oi);
38
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
39
#endif
40
41
h->base = base;
42
- h->align = a_bits;
43
return ldst;
44
}
45
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
47
48
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
49
50
- if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
51
+ if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
52
tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
53
} else {
54
tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
55
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
56
57
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
58
59
- if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
60
+ if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
61
tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
62
} else {
63
tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/ppc/tcg-target.c.inc | 19 ++++++++++++++++++-
5
1 file changed, 18 insertions(+), 1 deletion(-)
1
6
7
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/ppc/tcg-target.c.inc
10
+++ b/tcg/ppc/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
+ MemOp a_bits;
25
+
26
+ /*
27
+ * Book II, Section 1.4, Single-Copy Atomicity, specifies:
28
+ *
29
+ * Before 3.0, "An access that is not atomic is performed as a set of
30
+ * smaller disjoint atomic accesses. In general, the number and alignment
31
+ * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN.
32
+ *
33
+ * As of 3.0, "the non-atomic access is performed as described in
34
+ * the corresponding list", which matches MO_ATOM_SUBALIGN.
35
+ */
36
+ h->aa = atom_and_align_for_opc(s, opc,
37
+ have_isa_3_00 ? MO_ATOM_SUBALIGN
38
+ : MO_ATOM_IFALIGN,
39
+ false);
40
+ a_bits = h->aa.align;
41
42
#ifdef CONFIG_SOFTMMU
43
int mem_index = get_mmuidx(oi);
44
--
45
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/riscv/tcg-target.c.inc | 13 ++++++++-----
5
1 file changed, 8 insertions(+), 5 deletions(-)
1
6
7
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/riscv/tcg-target.c.inc
10
+++ b/tcg/riscv/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
12
{
13
TCGLabelQemuLdst *ldst = NULL;
14
MemOp opc = get_memop(oi);
15
- unsigned a_bits = get_alignment_bits(opc);
16
- unsigned a_mask = (1u << a_bits) - 1;
17
+ TCGAtomAlign aa;
18
+ unsigned a_mask;
19
+
20
+ aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
21
+ a_mask = (1u << aa.align) - 1;
22
23
#ifdef CONFIG_SOFTMMU
24
unsigned s_bits = opc & MO_SIZE;
25
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
26
* cross pages using the address of the last byte of the access.
27
*/
28
addr_adj = addr_reg;
29
- if (a_bits < s_bits) {
30
+ if (a_mask < s_mask) {
31
addr_adj = TCG_REG_TMP0;
32
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
33
addr_adj, addr_reg, s_mask - a_mask);
34
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
35
ldst->oi = oi;
36
ldst->addrlo_reg = addr_reg;
37
38
- /* We are expecting a_bits max 7, so we can always use andi. */
39
- tcg_debug_assert(a_bits < 12);
40
+ /* We are expecting alignment max 7, so we can always use andi. */
41
+ tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
42
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
43
44
ldst->label_ptr[0] = s->code_ptr;
45
--
46
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/s390x/tcg-target.c.inc | 11 +++++++----
5
1 file changed, 7 insertions(+), 4 deletions(-)
1
6
7
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/s390x/tcg-target.c.inc
10
+++ b/tcg/s390x/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ typedef struct {
12
TCGReg base;
13
TCGReg index;
14
int disp;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned a_mask = (1u << a_bits) - 1;
25
+ unsigned a_mask;
26
+
27
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
28
+ a_mask = (1 << h->aa.align) - 1;
29
30
#ifdef CONFIG_SOFTMMU
31
unsigned s_bits = opc & MO_SIZE;
32
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
33
* bits within the address. For unaligned access, we check that we don't
34
* cross pages using the address of the last byte of the access.
35
*/
36
- a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
37
+ a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
38
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
39
if (a_off == 0) {
40
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
41
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
42
ldst->addrlo_reg = addr_reg;
43
44
/* We are expecting a_bits to max out at 7, much lower than TMLL. */
45
- tcg_debug_assert(a_bits < 16);
46
+ tcg_debug_assert(a_mask <= 0xffff);
47
tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
48
49
tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
50
--
51
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 21 ++++++++++++---------
5
1 file changed, 12 insertions(+), 9 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned s_bits = opc & MO_SIZE;
25
+ MemOp s_bits = opc & MO_SIZE;
26
unsigned a_mask;
27
28
/* We don't support unaligned accesses. */
29
- a_bits = MAX(a_bits, s_bits);
30
- a_mask = (1u << a_bits) - 1;
31
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
32
+ h->aa.align = MAX(h->aa.align, s_bits);
33
+ a_mask = (1u << h->aa.align) - 1;
34
35
#ifdef CONFIG_SOFTMMU
36
int mem_index = get_mmuidx(oi);
37
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
38
cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
39
tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
40
#else
41
- if (a_bits != s_bits) {
42
- /*
43
- * Test for at least natural alignment, and defer
44
- * everything else to the helper functions.
45
- */
46
+ /*
47
+ * If the size equals the required alignment, we can skip the test
48
+ * and allow host SIGBUS to deliver SIGBUS to the guest.
49
+ * Otherwise, test for at least natural alignment and defer
50
+ * everything else to the helper functions.
51
+ */
52
+ if (s_bits != get_alignment_bits(opc)) {
53
tcg_debug_assert(check_fit_tl(a_mask, 13));
54
tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
55
56
--
57
2.34.1
diff view generated by jsdifflib
New patch
1
Use the fpu to perform 64-bit loads and stores.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/i386/tcg-target.c.inc | 44 +++++++++++++++++++++++++++++++++------
7
1 file changed, 38 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
14
#define OPC_GRP5 (0xff)
15
#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
16
17
+#define OPC_ESCDF (0xdf)
18
+#define ESCDF_FILD_m64 5
19
+#define ESCDF_FISTP_m64 7
20
+
21
/* Group 1 opcode extensions for 0x80-0x83.
22
These are also used as modifiers for OPC_ARITH. */
23
#define ARITH_ADD 0
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
25
datalo = datahi;
26
datahi = t;
27
}
28
- if (h.base == datalo || h.index == datalo) {
29
+ if (h.aa.atom == MO_64) {
30
+ /*
31
+ * Atomicity requires that we use use a single 8-byte load.
32
+ * For simplicity and code size, always use the FPU for this.
33
+ * Similar insns using SSE/AVX are merely larger.
34
+ * Load from memory in one go, then store back to the stack,
35
+ * from whence we can load into the correct integer regs.
36
+ */
37
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FILD_m64,
38
+ h.base, h.index, 0, h.ofs);
39
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FISTP_m64, TCG_REG_ESP, 0);
40
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
41
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
42
+ } else if (h.base == datalo || h.index == datalo) {
43
tcg_out_modrm_sib_offset(s, OPC_LEA, datahi,
44
h.base, h.index, 0, h.ofs);
45
tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0);
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
47
if (TCG_TARGET_REG_BITS == 64) {
48
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
49
h.base, h.index, 0, h.ofs);
50
+ break;
51
+ }
52
+ if (use_movbe) {
53
+ TCGReg t = datalo;
54
+ datalo = datahi;
55
+ datahi = t;
56
+ }
57
+ if (h.aa.atom == MO_64) {
58
+ /*
59
+ * Atomicity requires that we use use one 8-byte store.
60
+ * For simplicity, and code size, always use the FPU for this.
61
+ * Similar insns using SSE/AVX are merely larger.
62
+ * Assemble the 8-byte quantity in required endianness
63
+ * on the stack, load to coproc unit, and store.
64
+ */
65
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
66
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
67
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FILD_m64, TCG_REG_ESP, 0);
68
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FISTP_m64,
69
+ h.base, h.index, 0, h.ofs);
70
} else {
71
- if (use_movbe) {
72
- TCGReg t = datalo;
73
- datalo = datahi;
74
- datahi = t;
75
- }
76
tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
77
h.base, h.index, 0, h.ofs);
78
tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
79
--
80
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/i386/tcg-target.h | 3 +-
5
tcg/i386/tcg-target.c.inc | 181 +++++++++++++++++++++++++++++++++++++-
6
2 files changed, 180 insertions(+), 4 deletions(-)
1
7
8
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/i386/tcg-target.h
11
+++ b/tcg/i386/tcg-target.h
12
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
13
#define TCG_TARGET_HAS_qemu_st8_i32 1
14
#endif
15
16
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
17
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
18
+ (TCG_TARGET_REG_BITS == 64 && have_atomic16)
19
20
/* We do not support older SSE systems, only beginning with AVX1. */
21
#define TCG_TARGET_HAS_v64 have_avx1
22
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
23
index XXXXXXX..XXXXXXX 100644
24
--- a/tcg/i386/tcg-target.c.inc
25
+++ b/tcg/i386/tcg-target.c.inc
26
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
27
#endif
28
};
29
30
+#define TCG_TMP_VEC TCG_REG_XMM5
31
+
32
static const int tcg_target_call_iarg_regs[] = {
33
#if TCG_TARGET_REG_BITS == 64
34
#if defined(_WIN64)
35
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
36
#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
37
#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
38
#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
39
+#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16)
40
+#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16)
41
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
42
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
43
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
44
@@ -XXX,XX +XXX,XX @@ typedef struct {
45
46
bool tcg_target_has_memory_bswap(MemOp memop)
47
{
48
- return have_movbe;
49
+ TCGAtomAlign aa;
50
+
51
+ if (!have_movbe) {
52
+ return false;
53
+ }
54
+ if ((memop & MO_SIZE) <= MO_64) {
55
+ return true;
56
+ }
57
+
58
+ /*
59
+ * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
60
+ * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
61
+ */
62
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
63
+ return aa.atom <= MO_64;
64
}
65
66
/*
67
@@ -XXX,XX +XXX,XX @@ static const TCGLdstHelperParam ldst_helper_param = {
68
static const TCGLdstHelperParam ldst_helper_param = { };
69
#endif
70
71
+static void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
72
+ TCGReg l, TCGReg h, TCGReg v)
73
+{
74
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
75
+
76
+ /* vpmov{d,q} %v, %l */
77
+ tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
78
+ /* vpextr{d,q} $1, %v, %h */
79
+ tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
80
+ tcg_out8(s, 1);
81
+}
82
+
83
+static void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
84
+ TCGReg v, TCGReg l, TCGReg h)
85
+{
86
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
87
+
88
+ /* vmov{d,q} %l, %v */
89
+ tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
90
+ /* vpinsr{d,q} $1, %h, %v, %v */
91
+ tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
92
+ tcg_out8(s, 1);
93
+}
94
+
95
/*
96
* Generate code for the slow path for a load at the end of block
97
*/
98
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
99
{
100
TCGLabelQemuLdst *ldst = NULL;
101
MemOp opc = get_memop(oi);
102
+ MemOp s_bits = opc & MO_SIZE;
103
unsigned a_mask;
104
105
#ifdef CONFIG_SOFTMMU
106
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
107
*h = x86_guest_base;
108
#endif
109
h->base = addrlo;
110
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
111
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
112
a_mask = (1 << h->aa.align) - 1;
113
114
#ifdef CONFIG_SOFTMMU
115
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
116
TCGType tlbtype = TCG_TYPE_I32;
117
int trexw = 0, hrexw = 0, tlbrexw = 0;
118
unsigned mem_index = get_mmuidx(oi);
119
- unsigned s_bits = opc & MO_SIZE;
120
unsigned s_mask = (1 << s_bits) - 1;
121
target_ulong tlb_mask;
122
123
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
124
h.base, h.index, 0, h.ofs + 4);
125
}
126
break;
127
+
128
+ case MO_128:
129
+ {
130
+ TCGLabel *l1 = NULL, *l2 = NULL;
131
+ bool use_pair = h.aa.atom < MO_128;
132
+
133
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
134
+
135
+ if (!use_pair) {
136
+ tcg_debug_assert(!use_movbe);
137
+ /*
138
+ * Atomicity requires that we use use VMOVDQA.
139
+ * If we've already checked for 16-byte alignment, that's all
140
+ * we need. If we arrive here with lesser alignment, then we
141
+ * have determined that less than 16-byte alignment can be
142
+ * satisfied with two 8-byte loads.
143
+ */
144
+ if (h.aa.align < MO_128) {
145
+ use_pair = true;
146
+ l1 = gen_new_label();
147
+ l2 = gen_new_label();
148
+
149
+ tcg_out_testi(s, h.base, 15);
150
+ tcg_out_jxx(s, JCC_JNE, l2, true);
151
+ }
152
+
153
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
154
+ TCG_TMP_VEC, 0,
155
+ h.base, h.index, 0, h.ofs);
156
+ tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo,
157
+ datahi, TCG_TMP_VEC);
158
+
159
+ if (use_pair) {
160
+ tcg_out_jxx(s, JCC_JMP, l1, true);
161
+ tcg_out_label(s, l2);
162
+ }
163
+ }
164
+ if (use_pair) {
165
+ if (use_movbe) {
166
+ TCGReg t = datalo;
167
+ datalo = datahi;
168
+ datahi = t;
169
+ }
170
+ if (h.base == datalo || h.index == datalo) {
171
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
172
+ h.base, h.index, 0, h.ofs);
173
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
174
+ datalo, datahi, 0);
175
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
176
+ datahi, datahi, 8);
177
+ } else {
178
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
179
+ h.base, h.index, 0, h.ofs);
180
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
181
+ h.base, h.index, 0, h.ofs + 8);
182
+ }
183
+ }
184
+ if (l1) {
185
+ tcg_out_label(s, l1);
186
+ }
187
+ }
188
+ break;
189
+
190
default:
191
g_assert_not_reached();
192
}
193
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
194
h.base, h.index, 0, h.ofs + 4);
195
}
196
break;
197
+
198
+ case MO_128:
199
+ {
200
+ TCGLabel *l1 = NULL, *l2 = NULL;
201
+ bool use_pair = h.aa.atom < MO_128;
202
+
203
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
204
+
205
+ if (!use_pair) {
206
+ tcg_debug_assert(!use_movbe);
207
+ /*
208
+ * Atomicity requires that we use use VMOVDQA.
209
+ * If we've already checked for 16-byte alignment, that's all
210
+ * we need. If we arrive here with lesser alignment, then we
211
+ * have determined that less that 16-byte alignment can be
212
+ * satisfied with two 8-byte loads.
213
+ */
214
+ if (h.aa.align < MO_128) {
215
+ use_pair = true;
216
+ l1 = gen_new_label();
217
+ l2 = gen_new_label();
218
+
219
+ tcg_out_testi(s, h.base, 15);
220
+ tcg_out_jxx(s, JCC_JNE, l2, true);
221
+ }
222
+
223
+ tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC,
224
+ datalo, datahi);
225
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
226
+ TCG_TMP_VEC, 0,
227
+ h.base, h.index, 0, h.ofs);
228
+
229
+ if (use_pair) {
230
+ tcg_out_jxx(s, JCC_JMP, l1, true);
231
+ tcg_out_label(s, l2);
232
+ }
233
+ }
234
+ if (use_pair) {
235
+ if (use_movbe) {
236
+ TCGReg t = datalo;
237
+ datalo = datahi;
238
+ datahi = t;
239
+ }
240
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
241
+ h.base, h.index, 0, h.ofs);
242
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
243
+ h.base, h.index, 0, h.ofs + 8);
244
+ }
245
+ if (l1) {
246
+ tcg_out_label(s, l1);
247
+ }
248
+ }
249
+ break;
250
+
251
default:
252
g_assert_not_reached();
253
}
254
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
255
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
256
}
257
break;
258
+ case INDEX_op_qemu_ld_i128:
259
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
260
+ tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
261
+ break;
262
case INDEX_op_qemu_st_i32:
263
case INDEX_op_qemu_st8_i32:
264
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
265
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
266
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
267
}
268
break;
269
+ case INDEX_op_qemu_st_i128:
270
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
271
+ tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
272
+ break;
273
274
OP_32_64(mulu2):
275
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
276
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
277
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
278
: C_O0_I4(L, L, L, L));
279
280
+ case INDEX_op_qemu_ld_i128:
281
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
282
+ return C_O2_I1(r, r, L);
283
+ case INDEX_op_qemu_st_i128:
284
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
285
+ return C_O0_I3(L, L, L);
286
+
287
case INDEX_op_brcond2_i32:
288
return C_O0_I4(r, r, ri, ri);
289
290
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
291
292
s->reserved_regs = 0;
293
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
294
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
295
#ifdef _WIN64
296
/* These are call saved, and we don't save them, so don't use them. */
297
tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
298
--
299
2.34.1
diff view generated by jsdifflib
New patch
1
We will need to allocate a second general-purpose temporary.
2
Rename the existing temps to add a distinguishing number.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target.c.inc | 50 ++++++++++++++++++------------------
8
1 file changed, 25 insertions(+), 25 deletions(-)
9
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.c.inc
13
+++ b/tcg/aarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
15
bool have_lse;
16
bool have_lse2;
17
18
-#define TCG_REG_TMP TCG_REG_X30
19
-#define TCG_VEC_TMP TCG_REG_V31
20
+#define TCG_REG_TMP0 TCG_REG_X30
21
+#define TCG_VEC_TMP0 TCG_REG_V31
22
23
#ifndef CONFIG_SOFTMMU
24
/* Note that XZR cannot be encoded in the address base register slot,
25
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
26
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
27
TCGReg r, TCGReg base, intptr_t offset)
28
{
29
- TCGReg temp = TCG_REG_TMP;
30
+ TCGReg temp = TCG_REG_TMP0;
31
32
if (offset < -0xffffff || offset > 0xffffff) {
33
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
35
}
36
37
/* Worst-case scenario, move offset to temp register, use reg offset. */
38
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
39
- tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
40
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
41
+ tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
42
}
43
44
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
46
if (offset == sextract64(offset, 0, 26)) {
47
tcg_out_insn(s, 3206, BL, offset);
48
} else {
49
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
50
- tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
51
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
52
+ tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
53
}
54
}
55
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
57
AArch64Insn insn;
58
59
if (rl == ah || (!const_bh && rl == bh)) {
60
- rl = TCG_REG_TMP;
61
+ rl = TCG_REG_TMP0;
62
}
63
64
if (const_bl) {
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
66
possibility of adding 0+const in the low part, and the
67
immediate add instructions encode XSP not XZR. Don't try
68
anything more elaborate here than loading another zero. */
69
- al = TCG_REG_TMP;
70
+ al = TCG_REG_TMP0;
71
tcg_out_movi(s, ext, al, 0);
72
}
73
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
75
{
76
TCGReg a1 = a0;
77
if (is_ctz) {
78
- a1 = TCG_REG_TMP;
79
+ a1 = TCG_REG_TMP0;
80
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
81
}
82
if (const_b && b == (ext ? 64 : 32)) {
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
84
AArch64Insn sel = I3506_CSEL;
85
86
tcg_out_cmp(s, ext, a0, 0, 1);
87
- tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
88
+ tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
89
90
if (const_b) {
91
if (b == -1) {
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
93
b = d;
94
}
95
}
96
- tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
97
+ tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
98
}
99
}
100
101
@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
102
}
103
104
static const TCGLdstHelperParam ldst_helper_param = {
105
- .ntmp = 1, .tmp = { TCG_REG_TMP }
106
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
107
};
108
109
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
110
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
111
112
set_jmp_insn_offset(s, which);
113
tcg_out32(s, I3206_B);
114
- tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
115
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
116
set_jmp_reset_offset(s, which);
117
}
118
119
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
120
ptrdiff_t i_offset = i_addr - jmp_rx;
121
122
/* Note that we asserted this in range in tcg_out_goto_tb. */
123
- insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
124
+ insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
125
}
126
qatomic_set((uint32_t *)jmp_rw, insn);
127
flush_idcache_range(jmp_rx, jmp_rw, 4);
128
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
129
130
case INDEX_op_rem_i64:
131
case INDEX_op_rem_i32:
132
- tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
133
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
134
+ tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
135
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
136
break;
137
case INDEX_op_remu_i64:
138
case INDEX_op_remu_i32:
139
- tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
140
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
141
+ tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
142
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
143
break;
144
145
case INDEX_op_shl_i64:
146
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
147
if (c2) {
148
tcg_out_rotl(s, ext, a0, a1, a2);
149
} else {
150
- tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
151
- tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
152
+ tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
153
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
154
}
155
break;
156
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
158
break;
159
}
160
}
161
- tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
162
- a2 = TCG_VEC_TMP;
163
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
164
+ a2 = TCG_VEC_TMP0;
165
}
166
if (is_scalar) {
167
insn = cmp_scalar_insn[cond];
168
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
169
s->reserved_regs = 0;
170
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
171
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
172
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
174
- tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
175
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
177
}
178
179
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
180
--
181
2.34.1
diff view generated by jsdifflib
New patch
1
Use LDXP+STXP when LSE2 is not present and 16-byte atomicity is required,
2
and LDP/STP otherwise. This requires allocating a second general-purpose
3
temporary, as Rs cannot overlap Rn in STXP.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target-con-set.h | 2 +
9
tcg/aarch64/tcg-target.h | 11 +-
10
tcg/aarch64/tcg-target.c.inc | 179 ++++++++++++++++++++++++++++++-
11
3 files changed, 189 insertions(+), 3 deletions(-)
12
13
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/aarch64/tcg-target-con-set.h
16
+++ b/tcg/aarch64/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(lZ, l)
18
C_O0_I2(r, rA)
19
C_O0_I2(rZ, r)
20
C_O0_I2(w, r)
21
+C_O0_I3(lZ, lZ, l)
22
C_O1_I1(r, l)
23
C_O1_I1(r, r)
24
C_O1_I1(w, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wO)
26
C_O1_I2(w, w, wZ)
27
C_O1_I3(w, w, w, w)
28
C_O1_I4(r, r, rA, rZ, rZ)
29
+C_O2_I1(r, r, l)
30
C_O2_I4(r, r, rZ, rZ, rA, rMZ)
31
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/aarch64/tcg-target.h
34
+++ b/tcg/aarch64/tcg-target.h
35
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
36
#define TCG_TARGET_HAS_muluh_i64 1
37
#define TCG_TARGET_HAS_mulsh_i64 1
38
39
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
40
+/*
41
+ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
42
+ * which requires writable pages. We must defer to the helper for user-only,
43
+ * but in system mode all ram is writable for the host.
44
+ */
45
+#ifdef CONFIG_USER_ONLY
46
+#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
47
+#else
48
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
49
+#endif
50
51
#define TCG_TARGET_HAS_v64 1
52
#define TCG_TARGET_HAS_v128 1
53
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/aarch64/tcg-target.c.inc
56
+++ b/tcg/aarch64/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ bool have_lse;
58
bool have_lse2;
59
60
#define TCG_REG_TMP0 TCG_REG_X30
61
+#define TCG_REG_TMP1 TCG_REG_X17
62
#define TCG_VEC_TMP0 TCG_REG_V31
63
64
#ifndef CONFIG_SOFTMMU
65
@@ -XXX,XX +XXX,XX @@ typedef enum {
66
I3305_LDR_v64 = 0x5c000000,
67
I3305_LDR_v128 = 0x9c000000,
68
69
+ /* Load/store exclusive. */
70
+ I3306_LDXP = 0xc8600000,
71
+ I3306_STXP = 0xc8200000,
72
+
73
/* Load/store register. Described here as 3.3.12, but the helper
74
that emits them can transform to 3.3.10 or 3.3.13. */
75
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
76
@@ -XXX,XX +XXX,XX @@ typedef enum {
77
I3406_ADR = 0x10000000,
78
I3406_ADRP = 0x90000000,
79
80
+ /* Add/subtract extended register instructions. */
81
+ I3501_ADD = 0x0b200000,
82
+
83
/* Add/subtract shifted register instructions (without a shift). */
84
I3502_ADD = 0x0b000000,
85
I3502_ADDS = 0x2b000000,
86
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
87
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
88
}
89
90
+static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
91
+ TCGReg rt, TCGReg rt2, TCGReg rn)
92
+{
93
+ tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
94
+}
95
+
96
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
97
TCGReg rt, int imm19)
98
{
99
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
100
tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
101
}
102
103
+static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
104
+ TCGType sf, TCGReg rd, TCGReg rn,
105
+ TCGReg rm, int opt, int imm3)
106
+{
107
+ tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
108
+ imm3 << 10 | rn << 5 | rd);
109
+}
110
+
111
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
112
the rare occasion when we actually want to supply a shift amount. */
113
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
114
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
115
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
116
TCGLabelQemuLdst *ldst = NULL;
117
MemOp opc = get_memop(oi);
118
+ MemOp s_bits = opc & MO_SIZE;
119
unsigned a_mask;
120
121
h->aa = atom_and_align_for_opc(s, opc,
122
have_lse2 ? MO_ATOM_WITHIN16
123
: MO_ATOM_IFALIGN,
124
- false);
125
+ s_bits == MO_128);
126
a_mask = (1 << h->aa.align) - 1;
127
128
#ifdef CONFIG_SOFTMMU
129
- unsigned s_bits = opc & MO_SIZE;
130
unsigned s_mask = (1u << s_bits) - 1;
131
unsigned mem_index = get_mmuidx(oi);
132
TCGReg x3;
133
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
134
}
135
}
136
137
+static TCGLabelQemuLdst *
138
+prepare_host_addr_base_only(TCGContext *s, HostAddress *h, TCGReg addr_reg,
139
+ MemOpIdx oi, bool is_ld)
140
+{
141
+ TCGLabelQemuLdst *ldst;
142
+
143
+ ldst = prepare_host_addr(s, h, addr_reg, oi, true);
144
+
145
+ /* Compose the final address, as LDP/STP have no indexing. */
146
+ if (h->index != TCG_REG_XZR) {
147
+ tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, TCG_REG_TMP0,
148
+ h->base, h->index,
149
+ h->index_ext == TCG_TYPE_I32 ? MO_32 : MO_64, 0);
150
+ h->base = TCG_REG_TMP0;
151
+ h->index = TCG_REG_XZR;
152
+ h->index_ext = TCG_TYPE_I64;
153
+ }
154
+
155
+ return ldst;
156
+}
157
+
158
+static void tcg_out_qemu_ld128(TCGContext *s, TCGReg datalo, TCGReg datahi,
159
+ TCGReg addr_reg, MemOpIdx oi)
160
+{
161
+ TCGLabelQemuLdst *ldst;
162
+ HostAddress h;
163
+
164
+ ldst = prepare_host_addr_base_only(s, &h, addr_reg, oi, true);
165
+
166
+ if (h.aa.atom < MO_128 || have_lse2) {
167
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, h.base, 0, 0, 0);
168
+ } else {
169
+ TCGLabel *l0, *l1 = NULL;
170
+
171
+ /*
172
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
173
+ * 1: ldxp lo,hi,[addr]
174
+ * stxp tmp1,lo,hi,[addr]
175
+ * cbnz tmp1, 1b
176
+ *
177
+ * If we have already checked for 16-byte alignment, that's all
178
+ * we need. Otherwise we have determined that misaligned atomicity
179
+ * may be handled with two 8-byte loads.
180
+ */
181
+ if (h.aa.align < MO_128) {
182
+ /*
183
+ * TODO: align should be MO_64, so we only need test bit 3,
184
+ * which means we could use TBNZ instead of AND+CBNE.
185
+ */
186
+ l1 = gen_new_label();
187
+ tcg_out_logicali(s, I3404_ANDI, 0, TCG_REG_TMP1, addr_reg, 15);
188
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE,
189
+ TCG_REG_TMP1, 0, 1, l1);
190
+ }
191
+
192
+ l0 = gen_new_label();
193
+ tcg_out_label(s, l0);
194
+
195
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, datalo, datahi, h.base);
196
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP1, datalo, datahi, h.base);
197
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE, TCG_REG_TMP1, 0, 1, l0);
198
+
199
+ if (l1) {
200
+ TCGLabel *l2 = gen_new_label();
201
+ tcg_out_goto_label(s, l2);
202
+
203
+ tcg_out_label(s, l1);
204
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, h.base, 0, 0, 0);
205
+
206
+ tcg_out_label(s, l2);
207
+ }
208
+ }
209
+
210
+ if (ldst) {
211
+ ldst->type = TCG_TYPE_I128;
212
+ ldst->datalo_reg = datalo;
213
+ ldst->datahi_reg = datahi;
214
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
215
+ }
216
+}
217
+
218
+static void tcg_out_qemu_st128(TCGContext *s, TCGReg datalo, TCGReg datahi,
219
+ TCGReg addr_reg, MemOpIdx oi)
220
+{
221
+ TCGLabelQemuLdst *ldst;
222
+ HostAddress h;
223
+
224
+ ldst = prepare_host_addr_base_only(s, &h, addr_reg, oi, false);
225
+
226
+ if (h.aa.atom < MO_128 || have_lse2) {
227
+ tcg_out_insn(s, 3314, STP, datalo, datahi, h.base, 0, 0, 0);
228
+ } else {
229
+ TCGLabel *l0, *l1 = NULL;
230
+
231
+ /*
232
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
233
+ * 1: ldxp xzr,tmp1,[addr]
234
+ * stxp tmp1,lo,hi,[addr]
235
+ * cbnz tmp1, 1b
236
+ *
237
+ * If we have already checked for 16-byte alignment, that's all
238
+ * we need. Otherwise we have determined that misaligned atomicity
239
+ * may be handled with two 8-byte stores.
240
+ */
241
+ if (h.aa.align < MO_128) {
242
+ /*
243
+ * TODO: align should be MO_64, so we only need test bit 3,
244
+ * which means we could use TBNZ instead of AND+CBNE.
245
+ */
246
+ l1 = gen_new_label();
247
+ tcg_out_logicali(s, I3404_ANDI, 0, TCG_REG_TMP1, addr_reg, 15);
248
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE,
249
+ TCG_REG_TMP1, 0, 1, l1);
250
+ }
251
+
252
+ l0 = gen_new_label();
253
+ tcg_out_label(s, l0);
254
+
255
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR,
256
+ TCG_REG_XZR, TCG_REG_TMP1, h.base);
257
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP1, datalo, datahi, h.base);
258
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE, TCG_REG_TMP1, 0, 1, l0);
259
+
260
+ if (l1) {
261
+ TCGLabel *l2 = gen_new_label();
262
+ tcg_out_goto_label(s, l2);
263
+
264
+ tcg_out_label(s, l1);
265
+ tcg_out_insn(s, 3314, STP, datalo, datahi, h.base, 0, 0, 0);
266
+
267
+ tcg_out_label(s, l2);
268
+ }
269
+ }
270
+
271
+ if (ldst) {
272
+ ldst->type = TCG_TYPE_I128;
273
+ ldst->datalo_reg = datalo;
274
+ ldst->datahi_reg = datahi;
275
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
276
+ }
277
+}
278
+
279
static const tcg_insn_unit *tb_ret_addr;
280
281
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
282
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
283
case INDEX_op_qemu_st_i64:
284
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
285
break;
286
+ case INDEX_op_qemu_ld_i128:
287
+ tcg_out_qemu_ld128(s, a0, a1, a2, args[3]);
288
+ break;
289
+ case INDEX_op_qemu_st_i128:
290
+ tcg_out_qemu_st128(s, REG0(0), REG0(1), a2, args[3]);
291
+ break;
292
293
case INDEX_op_bswap64_i64:
294
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
295
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
296
case INDEX_op_qemu_ld_i32:
297
case INDEX_op_qemu_ld_i64:
298
return C_O1_I1(r, l);
299
+ case INDEX_op_qemu_ld_i128:
300
+ return C_O2_I1(r, r, l);
301
case INDEX_op_qemu_st_i32:
302
case INDEX_op_qemu_st_i64:
303
return C_O0_I2(lZ, l);
304
+ case INDEX_op_qemu_st_i128:
305
+ return C_O0_I3(lZ, lZ, l);
306
307
case INDEX_op_deposit_i32:
308
case INDEX_op_deposit_i64:
309
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
310
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
311
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
312
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
313
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
314
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
315
}
316
317
--
318
2.34.1
diff view generated by jsdifflib
New patch
1
Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required.
2
Note that these instructions do not require 16-byte alignment.
1
3
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/ppc/tcg-target-con-set.h | 2 +
8
tcg/ppc/tcg-target-con-str.h | 1 +
9
tcg/ppc/tcg-target.h | 3 +-
10
tcg/ppc/tcg-target.c.inc | 115 +++++++++++++++++++++++++++++++----
11
4 files changed, 108 insertions(+), 13 deletions(-)
12
13
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target-con-set.h
16
+++ b/tcg/ppc/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
18
C_O0_I2(r, ri)
19
C_O0_I2(v, r)
20
C_O0_I3(r, r, r)
21
+C_O0_I3(o, m, r)
22
C_O0_I4(r, r, ri, ri)
23
C_O0_I4(r, r, r, r)
24
C_O1_I1(r, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rZ, rZ)
27
C_O1_I4(r, r, r, ri, ri)
28
C_O2_I1(r, r, r)
29
+C_O2_I1(o, m, r)
30
C_O2_I2(r, r, r, r)
31
C_O2_I4(r, r, rI, rZM, r, r)
32
C_O2_I4(r, r, r, r, rI, rZM)
33
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/ppc/tcg-target-con-str.h
36
+++ b/tcg/ppc/tcg-target-con-str.h
37
@@ -XXX,XX +XXX,XX @@
38
* REGS(letter, register_mask)
39
*/
40
REGS('r', ALL_GENERAL_REGS)
41
+REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
42
REGS('v', ALL_VECTOR_REGS)
43
44
/*
45
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/ppc/tcg-target.h
48
+++ b/tcg/ppc/tcg-target.h
49
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
50
#define TCG_TARGET_HAS_mulsh_i64 1
51
#endif
52
53
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
54
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
55
+ (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
56
57
/*
58
* While technically Altivec could support V64, it has no 64-bit store
59
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/ppc/tcg-target.c.inc
62
+++ b/tcg/ppc/tcg-target.c.inc
63
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
64
65
#define B OPCD( 18)
66
#define BC OPCD( 16)
67
+
68
#define LBZ OPCD( 34)
69
#define LHZ OPCD( 40)
70
#define LHA OPCD( 42)
71
#define LWZ OPCD( 32)
72
#define LWZUX XO31( 55)
73
-#define STB OPCD( 38)
74
-#define STH OPCD( 44)
75
-#define STW OPCD( 36)
76
-
77
-#define STD XO62( 0)
78
-#define STDU XO62( 1)
79
-#define STDX XO31(149)
80
-
81
#define LD XO58( 0)
82
#define LDX XO31( 21)
83
#define LDU XO58( 1)
84
#define LDUX XO31( 53)
85
#define LWA XO58( 2)
86
#define LWAX XO31(341)
87
+#define LQ OPCD( 56)
88
+
89
+#define STB OPCD( 38)
90
+#define STH OPCD( 44)
91
+#define STW OPCD( 36)
92
+#define STD XO62( 0)
93
+#define STDU XO62( 1)
94
+#define STDX XO31(149)
95
+#define STQ XO62( 2)
96
97
#define ADDIC OPCD( 12)
98
#define ADDI OPCD( 14)
99
@@ -XXX,XX +XXX,XX @@ typedef struct {
100
101
bool tcg_target_has_memory_bswap(MemOp memop)
102
{
103
- return true;
104
+ TCGAtomAlign aa;
105
+
106
+ if ((memop & MO_SIZE) <= MO_64) {
107
+ return true;
108
+ }
109
+
110
+ /*
111
+ * Reject 16-byte memop with 16-byte atomicity,
112
+ * but do allow a pair of 64-bit operations.
113
+ */
114
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
115
+ return aa.atom <= MO_64;
116
}
117
118
/*
119
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
120
{
121
TCGLabelQemuLdst *ldst = NULL;
122
MemOp opc = get_memop(oi);
123
- MemOp a_bits;
124
+ MemOp a_bits, s_bits;
125
126
/*
127
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
128
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
129
* As of 3.0, "the non-atomic access is performed as described in
130
* the corresponding list", which matches MO_ATOM_SUBALIGN.
131
*/
132
+ s_bits = opc & MO_SIZE;
133
h->aa = atom_and_align_for_opc(s, opc,
134
have_isa_3_00 ? MO_ATOM_SUBALIGN
135
: MO_ATOM_IFALIGN,
136
- false);
137
+ s_bits == MO_128);
138
a_bits = h->aa.align;
139
140
#ifdef CONFIG_SOFTMMU
141
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
142
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
143
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
144
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
145
- unsigned s_bits = opc & MO_SIZE;
146
147
ldst = new_ldst_label(s);
148
ldst->is_ld = is_ld;
149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
150
}
151
}
152
153
+static TCGLabelQemuLdst *
154
+prepare_host_addr_index_only(TCGContext *s, HostAddress *h, TCGReg addr_reg,
155
+ MemOpIdx oi, bool is_ld)
156
+{
157
+ TCGLabelQemuLdst *ldst;
158
+
159
+ ldst = prepare_host_addr(s, h, addr_reg, -1, oi, true);
160
+
161
+ /* Compose the final address, as LQ/STQ have no indexing. */
162
+ if (h->base != 0) {
163
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, h->base, h->index));
164
+ h->index = TCG_REG_TMP1;
165
+ h->base = 0;
166
+ }
167
+
168
+ return ldst;
169
+}
170
+
171
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
172
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
173
+{
174
+ TCGLabelQemuLdst *ldst;
175
+ HostAddress h;
176
+ bool need_bswap;
177
+ uint32_t insn;
178
+
179
+ ldst = prepare_host_addr_index_only(s, &h, addr_reg, oi, is_ld);
180
+ need_bswap = get_memop(oi) & MO_BSWAP;
181
+
182
+ if (h.aa.atom == MO_128) {
183
+ tcg_debug_assert(!need_bswap);
184
+ tcg_debug_assert(datalo & 1);
185
+ tcg_debug_assert(datahi == datalo - 1);
186
+ insn = is_ld ? LQ : STQ;
187
+ tcg_out32(s, insn | TAI(datahi, h.index, 0));
188
+ } else {
189
+ TCGReg d1, d2;
190
+
191
+ if (HOST_BIG_ENDIAN ^ need_bswap) {
192
+ d1 = datahi, d2 = datalo;
193
+ } else {
194
+ d1 = datalo, d2 = datahi;
195
+ }
196
+
197
+ if (need_bswap) {
198
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
199
+ insn = is_ld ? LDBRX : STDBRX;
200
+ tcg_out32(s, insn | TAB(d1, 0, h.index));
201
+ tcg_out32(s, insn | TAB(d2, h.index, TCG_REG_R0));
202
+ } else {
203
+ insn = is_ld ? LD : STD;
204
+ tcg_out32(s, insn | TAI(d1, h.index, 0));
205
+ tcg_out32(s, insn | TAI(d2, h.index, 8));
206
+ }
207
+ }
208
+
209
+ if (ldst) {
210
+ ldst->type = TCG_TYPE_I128;
211
+ ldst->datalo_reg = datalo;
212
+ ldst->datahi_reg = datahi;
213
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
214
+ }
215
+}
216
+
217
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
218
{
219
int i;
220
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
221
args[4], TCG_TYPE_I64);
222
}
223
break;
224
+ case INDEX_op_qemu_ld_i128:
225
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
226
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
227
+ break;
228
+
229
case INDEX_op_qemu_st_i32:
230
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
231
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
232
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
233
args[4], TCG_TYPE_I64);
234
}
235
break;
236
+ case INDEX_op_qemu_st_i128:
237
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
238
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
239
+ break;
240
241
case INDEX_op_setcond_i32:
242
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
243
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
244
: TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
245
: C_O0_I4(r, r, r, r));
246
247
+ case INDEX_op_qemu_ld_i128:
248
+ return C_O2_I1(o, m, r);
249
+ case INDEX_op_qemu_st_i128:
250
+ return C_O0_I3(o, m, r);
251
+
252
case INDEX_op_add_vec:
253
case INDEX_op_sub_vec:
254
case INDEX_op_mul_vec:
255
--
256
2.34.1
diff view generated by jsdifflib
New patch
1
Use LPQ/STPQ when 16-byte atomicity is required.
2
Note that these instructions require 16-byte alignment.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/s390x/tcg-target-con-set.h | 2 +
8
tcg/s390x/tcg-target.h | 2 +-
9
tcg/s390x/tcg-target.c.inc | 103 ++++++++++++++++++++++++++++++++-
10
3 files changed, 103 insertions(+), 4 deletions(-)
11
12
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/s390x/tcg-target-con-set.h
15
+++ b/tcg/s390x/tcg-target-con-set.h
16
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
17
C_O0_I2(r, ri)
18
C_O0_I2(r, rA)
19
C_O0_I2(v, r)
20
+C_O0_I3(o, m, r)
21
C_O1_I1(r, r)
22
C_O1_I1(v, r)
23
C_O1_I1(v, v)
24
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
25
C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rI, r)
27
C_O1_I4(r, r, rA, rI, r)
28
+C_O2_I1(o, m, r)
29
C_O2_I2(o, m, 0, r)
30
C_O2_I2(o, m, r, r)
31
C_O2_I3(o, m, 0, 1, r)
32
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/s390x/tcg-target.h
35
+++ b/tcg/s390x/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
37
#define TCG_TARGET_HAS_muluh_i64 0
38
#define TCG_TARGET_HAS_mulsh_i64 0
39
40
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
41
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
42
43
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
44
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
45
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/s390x/tcg-target.c.inc
48
+++ b/tcg/s390x/tcg-target.c.inc
49
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
50
RXY_LLGF = 0xe316,
51
RXY_LLGH = 0xe391,
52
RXY_LMG = 0xeb04,
53
+ RXY_LPQ = 0xe38f,
54
RXY_LRV = 0xe31e,
55
RXY_LRVG = 0xe30f,
56
RXY_LRVH = 0xe31f,
57
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
58
RXY_STG = 0xe324,
59
RXY_STHY = 0xe370,
60
RXY_STMG = 0xeb24,
61
+ RXY_STPQ = 0xe38e,
62
RXY_STRV = 0xe33e,
63
RXY_STRVG = 0xe32f,
64
RXY_STRVH = 0xe33f,
65
@@ -XXX,XX +XXX,XX @@ typedef struct {
66
67
bool tcg_target_has_memory_bswap(MemOp memop)
68
{
69
- return true;
70
+ TCGAtomAlign aa;
71
+
72
+ if ((memop & MO_SIZE) <= MO_64) {
73
+ return true;
74
+ }
75
+
76
+ /*
77
+ * Reject 16-byte memop with 16-byte atomicity,
78
+ * but do allow a pair of 64-bit operations.
79
+ */
80
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
81
+ return aa.atom <= MO_64;
82
}
83
84
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
85
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
86
{
87
TCGLabelQemuLdst *ldst = NULL;
88
MemOp opc = get_memop(oi);
89
+ MemOp s_bits = opc & MO_SIZE;
90
unsigned a_mask;
91
92
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
93
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
94
a_mask = (1 << h->aa.align) - 1;
95
96
#ifdef CONFIG_SOFTMMU
97
- unsigned s_bits = opc & MO_SIZE;
98
unsigned s_mask = (1 << s_bits) - 1;
99
int mem_index = get_mmuidx(oi);
100
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
101
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
102
}
103
}
104
105
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
106
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
107
+{
108
+ TCGLabel *l1 = NULL, *l2 = NULL;
109
+ TCGLabelQemuLdst *ldst;
110
+ HostAddress h;
111
+ bool need_bswap;
112
+ bool use_pair;
113
+ S390Opcode insn;
114
+
115
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
116
+
117
+ use_pair = h.aa.atom < MO_128;
118
+ need_bswap = get_memop(oi) & MO_BSWAP;
119
+
120
+ if (!use_pair) {
121
+ /*
122
+ * Atomicity requires we use LPQ. If we've already checked for
123
+ * 16-byte alignment, that's all we need. If we arrive with
124
+ * lesser alignment, we have determined that less than 16-byte
125
+ * alignment can be satisfied with two 8-byte loads.
126
+ */
127
+ if (h.aa.align < MO_128) {
128
+ use_pair = true;
129
+ l1 = gen_new_label();
130
+ l2 = gen_new_label();
131
+
132
+ tcg_out_insn(s, RI, TMLL, addr_reg, 15);
133
+ tgen_branch(s, 7, l1); /* CC in {1,2,3} */
134
+ }
135
+
136
+ tcg_debug_assert(!need_bswap);
137
+ tcg_debug_assert(datalo & 1);
138
+ tcg_debug_assert(datahi == datalo - 1);
139
+ insn = is_ld ? RXY_LPQ : RXY_STPQ;
140
+ tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
141
+
142
+ if (use_pair) {
143
+ tgen_branch(s, S390_CC_ALWAYS, l2);
144
+ tcg_out_label(s, l1);
145
+ }
146
+ }
147
+ if (use_pair) {
148
+ TCGReg d1, d2;
149
+
150
+ if (need_bswap) {
151
+ d1 = datalo, d2 = datahi;
152
+ insn = is_ld ? RXY_LRVG : RXY_STRVG;
153
+ } else {
154
+ d1 = datahi, d2 = datalo;
155
+ insn = is_ld ? RXY_LG : RXY_STG;
156
+ }
157
+
158
+ if (h.base == d1 || h.index == d1) {
159
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
160
+ h.base = TCG_TMP0;
161
+ h.index = TCG_REG_NONE;
162
+ h.disp = 0;
163
+ }
164
+ tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
165
+ tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
166
+ }
167
+ if (l2) {
168
+ tcg_out_label(s, l2);
169
+ }
170
+
171
+ if (ldst) {
172
+ ldst->type = TCG_TYPE_I128;
173
+ ldst->datalo_reg = datalo;
174
+ ldst->datahi_reg = datahi;
175
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
176
+ }
177
+}
178
+
179
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
180
{
181
/* Reuse the zeroing that exists for goto_ptr. */
182
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
183
case INDEX_op_qemu_st_i64:
184
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
185
break;
186
+ case INDEX_op_qemu_ld_i128:
187
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
188
+ break;
189
+ case INDEX_op_qemu_st_i128:
190
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
191
+ break;
192
193
case INDEX_op_ld16s_i64:
194
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
195
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
196
case INDEX_op_qemu_st_i64:
197
case INDEX_op_qemu_st_i32:
198
return C_O0_I2(r, r);
199
+ case INDEX_op_qemu_ld_i128:
200
+ return C_O2_I1(o, m, r);
201
+ case INDEX_op_qemu_st_i128:
202
+ return C_O0_I3(o, m, r);
203
204
case INDEX_op_deposit_i32:
205
case INDEX_op_deposit_i64:
206
--
207
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/tcg-op-ldst.c | 1006 +++++++++++++++++++++++++++++++++++++++++++++
5
tcg/tcg-op.c | 974 -------------------------------------------
6
tcg/meson.build | 1 +
7
3 files changed, 1007 insertions(+), 974 deletions(-)
8
create mode 100644 tcg/tcg-op-ldst.c
1
9
10
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/tcg-op-ldst.c
15
@@ -XXX,XX +XXX,XX @@
16
+/*
17
+ * Tiny Code Generator for QEMU
18
+ *
19
+ * Copyright (c) 2008 Fabrice Bellard
20
+ *
21
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
22
+ * of this software and associated documentation files (the "Software"), to deal
23
+ * in the Software without restriction, including without limitation the rights
24
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25
+ * copies of the Software, and to permit persons to whom the Software is
26
+ * furnished to do so, subject to the following conditions:
27
+ *
28
+ * The above copyright notice and this permission notice shall be included in
29
+ * all copies or substantial portions of the Software.
30
+ *
31
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
34
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37
+ * THE SOFTWARE.
38
+ */
39
+
40
+#include "qemu/osdep.h"
41
+#include "exec/exec-all.h"
42
+#include "tcg/tcg.h"
43
+#include "tcg/tcg-temp-internal.h"
44
+#include "tcg/tcg-op.h"
45
+#include "tcg/tcg-mo.h"
46
+#include "exec/plugin-gen.h"
47
+#include "tcg-internal.h"
48
+
49
+
50
+static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
51
+{
52
+ /* Trigger the asserts within as early as possible. */
53
+ unsigned a_bits = get_alignment_bits(op);
54
+
55
+ /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
56
+ if (a_bits == (op & MO_SIZE)) {
57
+ op = (op & ~MO_AMASK) | MO_ALIGN;
58
+ }
59
+
60
+ switch (op & MO_SIZE) {
61
+ case MO_8:
62
+ op &= ~MO_BSWAP;
63
+ break;
64
+ case MO_16:
65
+ break;
66
+ case MO_32:
67
+ if (!is64) {
68
+ op &= ~MO_SIGN;
69
+ }
70
+ break;
71
+ case MO_64:
72
+ if (is64) {
73
+ op &= ~MO_SIGN;
74
+ break;
75
+ }
76
+ /* fall through */
77
+ default:
78
+ g_assert_not_reached();
79
+ }
80
+ if (st) {
81
+ op &= ~MO_SIGN;
82
+ }
83
+ return op;
84
+}
85
+
86
+static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
87
+ MemOp memop, TCGArg idx)
88
+{
89
+ MemOpIdx oi = make_memop_idx(memop, idx);
90
+#if TARGET_LONG_BITS == 32
91
+ tcg_gen_op3i_i32(opc, val, addr, oi);
92
+#else
93
+ if (TCG_TARGET_REG_BITS == 32) {
94
+ tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
95
+ } else {
96
+ tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
97
+ }
98
+#endif
99
+}
100
+
101
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
102
+ MemOp memop, TCGArg idx)
103
+{
104
+ MemOpIdx oi = make_memop_idx(memop, idx);
105
+#if TARGET_LONG_BITS == 32
106
+ if (TCG_TARGET_REG_BITS == 32) {
107
+ tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
108
+ } else {
109
+ tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
110
+ }
111
+#else
112
+ if (TCG_TARGET_REG_BITS == 32) {
113
+ tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
114
+ TCGV_LOW(addr), TCGV_HIGH(addr), oi);
115
+ } else {
116
+ tcg_gen_op3i_i64(opc, val, addr, oi);
117
+ }
118
+#endif
119
+}
120
+
121
+static void tcg_gen_req_mo(TCGBar type)
122
+{
123
+#ifdef TCG_GUEST_DEFAULT_MO
124
+ type &= TCG_GUEST_DEFAULT_MO;
125
+#endif
126
+ type &= ~TCG_TARGET_DEFAULT_MO;
127
+ if (type) {
128
+ tcg_gen_mb(type | TCG_BAR_SC);
129
+ }
130
+}
131
+
132
+static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
133
+{
134
+#ifdef CONFIG_PLUGIN
135
+ if (tcg_ctx->plugin_insn != NULL) {
136
+ /* Save a copy of the vaddr for use after a load. */
137
+ TCGv temp = tcg_temp_new();
138
+ tcg_gen_mov_tl(temp, vaddr);
139
+ return temp;
140
+ }
141
+#endif
142
+ return vaddr;
143
+}
144
+
145
+static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
146
+ enum qemu_plugin_mem_rw rw)
147
+{
148
+#ifdef CONFIG_PLUGIN
149
+ if (tcg_ctx->plugin_insn != NULL) {
150
+ qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
151
+ plugin_gen_empty_mem_callback(vaddr, info);
152
+ tcg_temp_free(vaddr);
153
+ }
154
+#endif
155
+}
156
+
157
+void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
158
+{
159
+ MemOp orig_memop;
160
+ MemOpIdx oi;
161
+
162
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
163
+ memop = tcg_canonicalize_memop(memop, 0, 0);
164
+ oi = make_memop_idx(memop, idx);
165
+
166
+ orig_memop = memop;
167
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
168
+ memop &= ~MO_BSWAP;
169
+ /* The bswap primitive benefits from zero-extended input. */
170
+ if ((memop & MO_SSIZE) == MO_SW) {
171
+ memop &= ~MO_SIGN;
172
+ }
173
+ }
174
+
175
+ addr = plugin_prep_mem_callbacks(addr);
176
+ gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
177
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
178
+
179
+ if ((orig_memop ^ memop) & MO_BSWAP) {
180
+ switch (orig_memop & MO_SIZE) {
181
+ case MO_16:
182
+ tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
183
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
184
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
185
+ break;
186
+ case MO_32:
187
+ tcg_gen_bswap32_i32(val, val);
188
+ break;
189
+ default:
190
+ g_assert_not_reached();
191
+ }
192
+ }
193
+}
194
+
195
+void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
196
+{
197
+ TCGv_i32 swap = NULL;
198
+ MemOpIdx oi;
199
+
200
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
201
+ memop = tcg_canonicalize_memop(memop, 0, 1);
202
+ oi = make_memop_idx(memop, idx);
203
+
204
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
205
+ swap = tcg_temp_ebb_new_i32();
206
+ switch (memop & MO_SIZE) {
207
+ case MO_16:
208
+ tcg_gen_bswap16_i32(swap, val, 0);
209
+ break;
210
+ case MO_32:
211
+ tcg_gen_bswap32_i32(swap, val);
212
+ break;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ val = swap;
217
+ memop &= ~MO_BSWAP;
218
+ }
219
+
220
+ addr = plugin_prep_mem_callbacks(addr);
221
+ if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
222
+ gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
223
+ } else {
224
+ gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
225
+ }
226
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
227
+
228
+ if (swap) {
229
+ tcg_temp_free_i32(swap);
230
+ }
231
+}
232
+
233
+void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
234
+{
235
+ MemOp orig_memop;
236
+ MemOpIdx oi;
237
+
238
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
239
+ tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
240
+ if (memop & MO_SIGN) {
241
+ tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
242
+ } else {
243
+ tcg_gen_movi_i32(TCGV_HIGH(val), 0);
244
+ }
245
+ return;
246
+ }
247
+
248
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
249
+ memop = tcg_canonicalize_memop(memop, 1, 0);
250
+ oi = make_memop_idx(memop, idx);
251
+
252
+ orig_memop = memop;
253
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
254
+ memop &= ~MO_BSWAP;
255
+ /* The bswap primitive benefits from zero-extended input. */
256
+ if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
257
+ memop &= ~MO_SIGN;
258
+ }
259
+ }
260
+
261
+ addr = plugin_prep_mem_callbacks(addr);
262
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
263
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
264
+
265
+ if ((orig_memop ^ memop) & MO_BSWAP) {
266
+ int flags = (orig_memop & MO_SIGN
267
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
268
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
269
+ switch (orig_memop & MO_SIZE) {
270
+ case MO_16:
271
+ tcg_gen_bswap16_i64(val, val, flags);
272
+ break;
273
+ case MO_32:
274
+ tcg_gen_bswap32_i64(val, val, flags);
275
+ break;
276
+ case MO_64:
277
+ tcg_gen_bswap64_i64(val, val);
278
+ break;
279
+ default:
280
+ g_assert_not_reached();
281
+ }
282
+ }
283
+}
284
+
285
+void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
286
+{
287
+ TCGv_i64 swap = NULL;
288
+ MemOpIdx oi;
289
+
290
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
291
+ tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
292
+ return;
293
+ }
294
+
295
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
296
+ memop = tcg_canonicalize_memop(memop, 1, 1);
297
+ oi = make_memop_idx(memop, idx);
298
+
299
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
300
+ swap = tcg_temp_ebb_new_i64();
301
+ switch (memop & MO_SIZE) {
302
+ case MO_16:
303
+ tcg_gen_bswap16_i64(swap, val, 0);
304
+ break;
305
+ case MO_32:
306
+ tcg_gen_bswap32_i64(swap, val, 0);
307
+ break;
308
+ case MO_64:
309
+ tcg_gen_bswap64_i64(swap, val);
310
+ break;
311
+ default:
312
+ g_assert_not_reached();
313
+ }
314
+ val = swap;
315
+ memop &= ~MO_BSWAP;
316
+ }
317
+
318
+ addr = plugin_prep_mem_callbacks(addr);
319
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
320
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
321
+
322
+ if (swap) {
323
+ tcg_temp_free_i64(swap);
324
+ }
325
+}
326
+
327
+/*
328
+ * Return true if @mop, without knowledge of the pointer alignment,
329
+ * does not require 16-byte atomicity, and it would be adventagous
330
+ * to avoid a call to a helper function.
331
+ */
332
+static bool use_two_i64_for_i128(MemOp mop)
333
+{
334
+#ifdef CONFIG_SOFTMMU
335
+ /* Two softmmu tlb lookups is larger than one function call. */
336
+ return false;
337
+#else
338
+ /*
339
+ * For user-only, two 64-bit operations may well be smaller than a call.
340
+ * Determine if that would be legal for the requested atomicity.
341
+ */
342
+ switch (mop & MO_ATOM_MASK) {
343
+ case MO_ATOM_NONE:
344
+ case MO_ATOM_IFALIGN_PAIR:
345
+ return true;
346
+ case MO_ATOM_IFALIGN:
347
+ case MO_ATOM_SUBALIGN:
348
+ case MO_ATOM_WITHIN16:
349
+ case MO_ATOM_WITHIN16_PAIR:
350
+ /* In a serialized context, no atomicity is required. */
351
+ return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
352
+ default:
353
+ g_assert_not_reached();
354
+ }
355
+#endif
356
+}
357
+
358
+static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
359
+{
360
+ MemOp mop_1 = orig, mop_2;
361
+
362
+ tcg_debug_assert((orig & MO_SIZE) == MO_128);
363
+ tcg_debug_assert((orig & MO_SIGN) == 0);
364
+
365
+ /* Reduce the size to 64-bit. */
366
+ mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
367
+
368
+ /* Retain the alignment constraints of the original. */
369
+ switch (orig & MO_AMASK) {
370
+ case MO_UNALN:
371
+ case MO_ALIGN_2:
372
+ case MO_ALIGN_4:
373
+ mop_2 = mop_1;
374
+ break;
375
+ case MO_ALIGN_8:
376
+ /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
377
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
378
+ mop_2 = mop_1;
379
+ break;
380
+ case MO_ALIGN:
381
+ /* Second has 8-byte alignment; first has 16-byte alignment. */
382
+ mop_2 = mop_1;
383
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
384
+ break;
385
+ case MO_ALIGN_16:
386
+ case MO_ALIGN_32:
387
+ case MO_ALIGN_64:
388
+ /* Second has 8-byte alignment; first retains original. */
389
+ mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
390
+ break;
391
+ default:
392
+ g_assert_not_reached();
393
+ }
394
+
395
+ /* Use a memory ordering implemented by the host. */
396
+ if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
397
+ mop_1 &= ~MO_BSWAP;
398
+ mop_2 &= ~MO_BSWAP;
399
+ }
400
+
401
+ ret[0] = mop_1;
402
+ ret[1] = mop_2;
403
+}
404
+
405
+#if TARGET_LONG_BITS == 64
406
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
407
+#else
408
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
409
+#endif
410
+
411
+void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
412
+{
413
+ const MemOpIdx oi = make_memop_idx(memop, idx);
414
+
415
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
416
+ tcg_debug_assert((memop & MO_SIGN) == 0);
417
+
418
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
419
+ addr = plugin_prep_mem_callbacks(addr);
420
+
421
+ /* TODO: For now, force 32-bit hosts to use the helper. */
422
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
423
+ TCGv_i64 lo, hi;
424
+ TCGArg addr_arg;
425
+ MemOpIdx adj_oi;
426
+ bool need_bswap = false;
427
+
428
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
429
+ lo = TCGV128_HIGH(val);
430
+ hi = TCGV128_LOW(val);
431
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
432
+ need_bswap = true;
433
+ } else {
434
+ lo = TCGV128_LOW(val);
435
+ hi = TCGV128_HIGH(val);
436
+ adj_oi = oi;
437
+ }
438
+
439
+#if TARGET_LONG_BITS == 32
440
+ addr_arg = tcgv_i32_arg(addr);
441
+#else
442
+ addr_arg = tcgv_i64_arg(addr);
443
+#endif
444
+ tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
445
+
446
+ if (need_bswap) {
447
+ tcg_gen_bswap64_i64(lo, lo);
448
+ tcg_gen_bswap64_i64(hi, hi);
449
+ }
450
+ } else if (use_two_i64_for_i128(memop)) {
451
+ MemOp mop[2];
452
+ TCGv addr_p8;
453
+ TCGv_i64 x, y;
454
+
455
+ canonicalize_memop_i128_as_i64(mop, memop);
456
+
457
+ /*
458
+ * Since there are no global TCGv_i128, there is no visible state
459
+ * changed if the second load faults. Load directly into the two
460
+ * subwords.
461
+ */
462
+ if ((memop & MO_BSWAP) == MO_LE) {
463
+ x = TCGV128_LOW(val);
464
+ y = TCGV128_HIGH(val);
465
+ } else {
466
+ x = TCGV128_HIGH(val);
467
+ y = TCGV128_LOW(val);
468
+ }
469
+
470
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
471
+
472
+ if ((mop[0] ^ memop) & MO_BSWAP) {
473
+ tcg_gen_bswap64_i64(x, x);
474
+ }
475
+
476
+ addr_p8 = tcg_temp_ebb_new();
477
+ tcg_gen_addi_tl(addr_p8, addr, 8);
478
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
479
+ tcg_temp_free(addr_p8);
480
+
481
+ if ((mop[0] ^ memop) & MO_BSWAP) {
482
+ tcg_gen_bswap64_i64(y, y);
483
+ }
484
+ } else {
485
+ gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
486
+ }
487
+
488
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
489
+}
490
+
491
+void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
492
+{
493
+ const MemOpIdx oi = make_memop_idx(memop, idx);
494
+
495
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
496
+ tcg_debug_assert((memop & MO_SIGN) == 0);
497
+
498
+ tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
499
+ addr = plugin_prep_mem_callbacks(addr);
500
+
501
+ /* TODO: For now, force 32-bit hosts to use the helper. */
502
+
503
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
504
+ TCGv_i64 lo, hi;
505
+ TCGArg addr_arg;
506
+ MemOpIdx adj_oi;
507
+ bool need_bswap = false;
508
+
509
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
510
+ lo = tcg_temp_new_i64();
511
+ hi = tcg_temp_new_i64();
512
+ tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
513
+ tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
514
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
515
+ need_bswap = true;
516
+ } else {
517
+ lo = TCGV128_LOW(val);
518
+ hi = TCGV128_HIGH(val);
519
+ adj_oi = oi;
520
+ }
521
+
522
+#if TARGET_LONG_BITS == 32
523
+ addr_arg = tcgv_i32_arg(addr);
524
+#else
525
+ addr_arg = tcgv_i64_arg(addr);
526
+#endif
527
+ tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
528
+
529
+ if (need_bswap) {
530
+ tcg_temp_free_i64(lo);
531
+ tcg_temp_free_i64(hi);
532
+ }
533
+ } else if (use_two_i64_for_i128(memop)) {
534
+ MemOp mop[2];
535
+ TCGv addr_p8;
536
+ TCGv_i64 x, y;
537
+
538
+ canonicalize_memop_i128_as_i64(mop, memop);
539
+
540
+ if ((memop & MO_BSWAP) == MO_LE) {
541
+ x = TCGV128_LOW(val);
542
+ y = TCGV128_HIGH(val);
543
+ } else {
544
+ x = TCGV128_HIGH(val);
545
+ y = TCGV128_LOW(val);
546
+ }
547
+
548
+ addr_p8 = tcg_temp_ebb_new();
549
+ if ((mop[0] ^ memop) & MO_BSWAP) {
550
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
551
+
552
+ tcg_gen_bswap64_i64(t, x);
553
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
554
+ tcg_gen_bswap64_i64(t, y);
555
+ tcg_gen_addi_tl(addr_p8, addr, 8);
556
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
557
+ tcg_temp_free_i64(t);
558
+ } else {
559
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
560
+ tcg_gen_addi_tl(addr_p8, addr, 8);
561
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
562
+ }
563
+ tcg_temp_free(addr_p8);
564
+ } else {
565
+ gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
566
+ }
567
+
568
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
569
+}
570
+
571
+static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
572
+{
573
+ switch (opc & MO_SSIZE) {
574
+ case MO_SB:
575
+ tcg_gen_ext8s_i32(ret, val);
576
+ break;
577
+ case MO_UB:
578
+ tcg_gen_ext8u_i32(ret, val);
579
+ break;
580
+ case MO_SW:
581
+ tcg_gen_ext16s_i32(ret, val);
582
+ break;
583
+ case MO_UW:
584
+ tcg_gen_ext16u_i32(ret, val);
585
+ break;
586
+ default:
587
+ tcg_gen_mov_i32(ret, val);
588
+ break;
589
+ }
590
+}
591
+
592
+static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
593
+{
594
+ switch (opc & MO_SSIZE) {
595
+ case MO_SB:
596
+ tcg_gen_ext8s_i64(ret, val);
597
+ break;
598
+ case MO_UB:
599
+ tcg_gen_ext8u_i64(ret, val);
600
+ break;
601
+ case MO_SW:
602
+ tcg_gen_ext16s_i64(ret, val);
603
+ break;
604
+ case MO_UW:
605
+ tcg_gen_ext16u_i64(ret, val);
606
+ break;
607
+ case MO_SL:
608
+ tcg_gen_ext32s_i64(ret, val);
609
+ break;
610
+ case MO_UL:
611
+ tcg_gen_ext32u_i64(ret, val);
612
+ break;
613
+ default:
614
+ tcg_gen_mov_i64(ret, val);
615
+ break;
616
+ }
617
+}
618
+
619
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
620
+ TCGv_i32, TCGv_i32, TCGv_i32);
621
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
622
+ TCGv_i64, TCGv_i64, TCGv_i32);
623
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
624
+ TCGv_i128, TCGv_i128, TCGv_i32);
625
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
626
+ TCGv_i32, TCGv_i32);
627
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
628
+ TCGv_i64, TCGv_i32);
629
+
630
+#ifdef CONFIG_ATOMIC64
631
+# define WITH_ATOMIC64(X) X,
632
+#else
633
+# define WITH_ATOMIC64(X)
634
+#endif
635
+#ifdef CONFIG_CMPXCHG128
636
+# define WITH_ATOMIC128(X) X,
637
+#else
638
+# define WITH_ATOMIC128(X)
639
+#endif
640
+
641
+static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
642
+ [MO_8] = gen_helper_atomic_cmpxchgb,
643
+ [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
644
+ [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
645
+ [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
646
+ [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
647
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
648
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
649
+ WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
650
+ WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
651
+};
652
+
653
+void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
654
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
655
+{
656
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
657
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
658
+
659
+ tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
660
+
661
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
662
+ tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
663
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
664
+ tcg_temp_free_i32(t2);
665
+
666
+ if (memop & MO_SIGN) {
667
+ tcg_gen_ext_i32(retv, t1, memop);
668
+ } else {
669
+ tcg_gen_mov_i32(retv, t1);
670
+ }
671
+ tcg_temp_free_i32(t1);
672
+}
673
+
674
+void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
675
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
676
+{
677
+ gen_atomic_cx_i32 gen;
678
+ MemOpIdx oi;
679
+
680
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
681
+ tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
682
+ return;
683
+ }
684
+
685
+ memop = tcg_canonicalize_memop(memop, 0, 0);
686
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
687
+ tcg_debug_assert(gen != NULL);
688
+
689
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
690
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
691
+
692
+ if (memop & MO_SIGN) {
693
+ tcg_gen_ext_i32(retv, retv, memop);
694
+ }
695
+}
696
+
697
+void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
698
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
699
+{
700
+ TCGv_i64 t1, t2;
701
+
702
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
703
+ tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
704
+ TCGV_LOW(newv), idx, memop);
705
+ if (memop & MO_SIGN) {
706
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
707
+ } else {
708
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
709
+ }
710
+ return;
711
+ }
712
+
713
+ t1 = tcg_temp_ebb_new_i64();
714
+ t2 = tcg_temp_ebb_new_i64();
715
+
716
+ tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
717
+
718
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
719
+ tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
720
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
721
+ tcg_temp_free_i64(t2);
722
+
723
+ if (memop & MO_SIGN) {
724
+ tcg_gen_ext_i64(retv, t1, memop);
725
+ } else {
726
+ tcg_gen_mov_i64(retv, t1);
727
+ }
728
+ tcg_temp_free_i64(t1);
729
+}
730
+
731
+void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
732
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
733
+{
734
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
735
+ tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
736
+ return;
737
+ }
738
+
739
+ if ((memop & MO_SIZE) == MO_64) {
740
+ gen_atomic_cx_i64 gen;
741
+
742
+ memop = tcg_canonicalize_memop(memop, 1, 0);
743
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
744
+ if (gen) {
745
+ MemOpIdx oi = make_memop_idx(memop, idx);
746
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
747
+ return;
748
+ }
749
+
750
+ gen_helper_exit_atomic(cpu_env);
751
+
752
+ /*
753
+ * Produce a result for a well-formed opcode stream. This satisfies
754
+ * liveness for set before used, which happens before this dead code
755
+ * is removed.
756
+ */
757
+ tcg_gen_movi_i64(retv, 0);
758
+ return;
759
+ }
760
+
761
+ if (TCG_TARGET_REG_BITS == 32) {
762
+ tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
763
+ TCGV_LOW(newv), idx, memop);
764
+ if (memop & MO_SIGN) {
765
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
766
+ } else {
767
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
768
+ }
769
+ } else {
770
+ TCGv_i32 c32 = tcg_temp_ebb_new_i32();
771
+ TCGv_i32 n32 = tcg_temp_ebb_new_i32();
772
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
773
+
774
+ tcg_gen_extrl_i64_i32(c32, cmpv);
775
+ tcg_gen_extrl_i64_i32(n32, newv);
776
+ tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
777
+ tcg_temp_free_i32(c32);
778
+ tcg_temp_free_i32(n32);
779
+
780
+ tcg_gen_extu_i32_i64(retv, r32);
781
+ tcg_temp_free_i32(r32);
782
+
783
+ if (memop & MO_SIGN) {
784
+ tcg_gen_ext_i64(retv, retv, memop);
785
+ }
786
+ }
787
+}
788
+
789
+void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
790
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
791
+{
792
+ if (TCG_TARGET_REG_BITS == 32) {
793
+ /* Inline expansion below is simply too large for 32-bit hosts. */
794
+ gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
795
+ ? gen_helper_nonatomic_cmpxchgo_le
796
+ : gen_helper_nonatomic_cmpxchgo_be);
797
+ MemOpIdx oi = make_memop_idx(memop, idx);
798
+
799
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
800
+ tcg_debug_assert((memop & MO_SIGN) == 0);
801
+
802
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
803
+ } else {
804
+ TCGv_i128 oldv = tcg_temp_ebb_new_i128();
805
+ TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
806
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
807
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
808
+ TCGv_i64 z = tcg_constant_i64(0);
809
+
810
+ tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
811
+
812
+ /* Compare i128 */
813
+ tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
814
+ tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
815
+ tcg_gen_or_i64(t0, t0, t1);
816
+
817
+ /* tmpv = equal ? newv : oldv */
818
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
819
+ TCGV128_LOW(newv), TCGV128_LOW(oldv));
820
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
821
+ TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
822
+
823
+ /* Unconditional writeback. */
824
+ tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
825
+ tcg_gen_mov_i128(retv, oldv);
826
+
827
+ tcg_temp_free_i64(t0);
828
+ tcg_temp_free_i64(t1);
829
+ tcg_temp_free_i128(tmpv);
830
+ tcg_temp_free_i128(oldv);
831
+ }
832
+}
833
+
834
+void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
835
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
836
+{
837
+ gen_atomic_cx_i128 gen;
838
+
839
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
840
+ tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
841
+ return;
842
+ }
843
+
844
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
845
+ tcg_debug_assert((memop & MO_SIGN) == 0);
846
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
847
+
848
+ if (gen) {
849
+ MemOpIdx oi = make_memop_idx(memop, idx);
850
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
851
+ return;
852
+ }
853
+
854
+ gen_helper_exit_atomic(cpu_env);
855
+
856
+ /*
857
+ * Produce a result for a well-formed opcode stream. This satisfies
858
+ * liveness for set before used, which happens before this dead code
859
+ * is removed.
860
+ */
861
+ tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
862
+ tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
863
+}
864
+
865
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
866
+ TCGArg idx, MemOp memop, bool new_val,
867
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
868
+{
869
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
870
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
871
+
872
+ memop = tcg_canonicalize_memop(memop, 0, 0);
873
+
874
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
875
+ tcg_gen_ext_i32(t2, val, memop);
876
+ gen(t2, t1, t2);
877
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
878
+
879
+ tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
880
+ tcg_temp_free_i32(t1);
881
+ tcg_temp_free_i32(t2);
882
+}
883
+
884
+static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
885
+ TCGArg idx, MemOp memop, void * const table[])
886
+{
887
+ gen_atomic_op_i32 gen;
888
+ MemOpIdx oi;
889
+
890
+ memop = tcg_canonicalize_memop(memop, 0, 0);
891
+
892
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
893
+ tcg_debug_assert(gen != NULL);
894
+
895
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
896
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
897
+
898
+ if (memop & MO_SIGN) {
899
+ tcg_gen_ext_i32(ret, ret, memop);
900
+ }
901
+}
902
+
903
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
904
+ TCGArg idx, MemOp memop, bool new_val,
905
+ void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
906
+{
907
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
908
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
909
+
910
+ memop = tcg_canonicalize_memop(memop, 1, 0);
911
+
912
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
913
+ tcg_gen_ext_i64(t2, val, memop);
914
+ gen(t2, t1, t2);
915
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
916
+
917
+ tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
918
+ tcg_temp_free_i64(t1);
919
+ tcg_temp_free_i64(t2);
920
+}
921
+
922
+static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
923
+ TCGArg idx, MemOp memop, void * const table[])
924
+{
925
+ memop = tcg_canonicalize_memop(memop, 1, 0);
926
+
927
+ if ((memop & MO_SIZE) == MO_64) {
928
+#ifdef CONFIG_ATOMIC64
929
+ gen_atomic_op_i64 gen;
930
+ MemOpIdx oi;
931
+
932
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
933
+ tcg_debug_assert(gen != NULL);
934
+
935
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
936
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
937
+#else
938
+ gen_helper_exit_atomic(cpu_env);
939
+ /* Produce a result, so that we have a well-formed opcode stream
940
+ with respect to uses of the result in the (dead) code following. */
941
+ tcg_gen_movi_i64(ret, 0);
942
+#endif /* CONFIG_ATOMIC64 */
943
+ } else {
944
+ TCGv_i32 v32 = tcg_temp_ebb_new_i32();
945
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
946
+
947
+ tcg_gen_extrl_i64_i32(v32, val);
948
+ do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
949
+ tcg_temp_free_i32(v32);
950
+
951
+ tcg_gen_extu_i32_i64(ret, r32);
952
+ tcg_temp_free_i32(r32);
953
+
954
+ if (memop & MO_SIGN) {
955
+ tcg_gen_ext_i64(ret, ret, memop);
956
+ }
957
+ }
958
+}
959
+
960
+#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
961
+static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
962
+ [MO_8] = gen_helper_atomic_##NAME##b, \
963
+ [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
964
+ [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
965
+ [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
966
+ [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
967
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
968
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
969
+}; \
970
+void tcg_gen_atomic_##NAME##_i32 \
971
+ (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
972
+{ \
973
+ if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
974
+ do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
975
+ } else { \
976
+ do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
977
+ tcg_gen_##OP##_i32); \
978
+ } \
979
+} \
980
+void tcg_gen_atomic_##NAME##_i64 \
981
+ (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
982
+{ \
983
+ if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
984
+ do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
985
+ } else { \
986
+ do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
987
+ tcg_gen_##OP##_i64); \
988
+ } \
989
+}
990
+
991
+GEN_ATOMIC_HELPER(fetch_add, add, 0)
992
+GEN_ATOMIC_HELPER(fetch_and, and, 0)
993
+GEN_ATOMIC_HELPER(fetch_or, or, 0)
994
+GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
995
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
996
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
997
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
998
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
999
+
1000
+GEN_ATOMIC_HELPER(add_fetch, add, 1)
1001
+GEN_ATOMIC_HELPER(and_fetch, and, 1)
1002
+GEN_ATOMIC_HELPER(or_fetch, or, 1)
1003
+GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1004
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1005
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1006
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1007
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1008
+
1009
+static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1010
+{
1011
+ tcg_gen_mov_i32(r, b);
1012
+}
1013
+
1014
+static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1015
+{
1016
+ tcg_gen_mov_i64(r, b);
1017
+}
1018
+
1019
+GEN_ATOMIC_HELPER(xchg, mov2, 0)
1020
+
1021
+#undef GEN_ATOMIC_HELPER
1022
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
1023
index XXXXXXX..XXXXXXX 100644
1024
--- a/tcg/tcg-op.c
1025
+++ b/tcg/tcg-op.c
1026
@@ -XXX,XX +XXX,XX @@
1027
#include "tcg/tcg.h"
1028
#include "tcg/tcg-temp-internal.h"
1029
#include "tcg/tcg-op.h"
1030
-#include "tcg/tcg-mo.h"
1031
#include "exec/plugin-gen.h"
1032
#include "tcg-internal.h"
1033
1034
@@ -XXX,XX +XXX,XX @@ void tcg_gen_lookup_and_goto_ptr(void)
1035
tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
1036
tcg_temp_free_ptr(ptr);
1037
}
1038
-
1039
-static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
1040
-{
1041
- /* Trigger the asserts within as early as possible. */
1042
- unsigned a_bits = get_alignment_bits(op);
1043
-
1044
- /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
1045
- if (a_bits == (op & MO_SIZE)) {
1046
- op = (op & ~MO_AMASK) | MO_ALIGN;
1047
- }
1048
-
1049
- switch (op & MO_SIZE) {
1050
- case MO_8:
1051
- op &= ~MO_BSWAP;
1052
- break;
1053
- case MO_16:
1054
- break;
1055
- case MO_32:
1056
- if (!is64) {
1057
- op &= ~MO_SIGN;
1058
- }
1059
- break;
1060
- case MO_64:
1061
- if (is64) {
1062
- op &= ~MO_SIGN;
1063
- break;
1064
- }
1065
- /* fall through */
1066
- default:
1067
- g_assert_not_reached();
1068
- }
1069
- if (st) {
1070
- op &= ~MO_SIGN;
1071
- }
1072
- return op;
1073
-}
1074
-
1075
-static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
1076
- MemOp memop, TCGArg idx)
1077
-{
1078
- MemOpIdx oi = make_memop_idx(memop, idx);
1079
-#if TARGET_LONG_BITS == 32
1080
- tcg_gen_op3i_i32(opc, val, addr, oi);
1081
-#else
1082
- if (TCG_TARGET_REG_BITS == 32) {
1083
- tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
1084
- } else {
1085
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
1086
- }
1087
-#endif
1088
-}
1089
-
1090
-static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
1091
- MemOp memop, TCGArg idx)
1092
-{
1093
- MemOpIdx oi = make_memop_idx(memop, idx);
1094
-#if TARGET_LONG_BITS == 32
1095
- if (TCG_TARGET_REG_BITS == 32) {
1096
- tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
1097
- } else {
1098
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
1099
- }
1100
-#else
1101
- if (TCG_TARGET_REG_BITS == 32) {
1102
- tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
1103
- TCGV_LOW(addr), TCGV_HIGH(addr), oi);
1104
- } else {
1105
- tcg_gen_op3i_i64(opc, val, addr, oi);
1106
- }
1107
-#endif
1108
-}
1109
-
1110
-static void tcg_gen_req_mo(TCGBar type)
1111
-{
1112
-#ifdef TCG_GUEST_DEFAULT_MO
1113
- type &= TCG_GUEST_DEFAULT_MO;
1114
-#endif
1115
- type &= ~TCG_TARGET_DEFAULT_MO;
1116
- if (type) {
1117
- tcg_gen_mb(type | TCG_BAR_SC);
1118
- }
1119
-}
1120
-
1121
-static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
1122
-{
1123
-#ifdef CONFIG_PLUGIN
1124
- if (tcg_ctx->plugin_insn != NULL) {
1125
- /* Save a copy of the vaddr for use after a load. */
1126
- TCGv temp = tcg_temp_new();
1127
- tcg_gen_mov_tl(temp, vaddr);
1128
- return temp;
1129
- }
1130
-#endif
1131
- return vaddr;
1132
-}
1133
-
1134
-static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
1135
- enum qemu_plugin_mem_rw rw)
1136
-{
1137
-#ifdef CONFIG_PLUGIN
1138
- if (tcg_ctx->plugin_insn != NULL) {
1139
- qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
1140
- plugin_gen_empty_mem_callback(vaddr, info);
1141
- tcg_temp_free(vaddr);
1142
- }
1143
-#endif
1144
-}
1145
-
1146
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1147
-{
1148
- MemOp orig_memop;
1149
- MemOpIdx oi;
1150
-
1151
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1152
- memop = tcg_canonicalize_memop(memop, 0, 0);
1153
- oi = make_memop_idx(memop, idx);
1154
-
1155
- orig_memop = memop;
1156
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1157
- memop &= ~MO_BSWAP;
1158
- /* The bswap primitive benefits from zero-extended input. */
1159
- if ((memop & MO_SSIZE) == MO_SW) {
1160
- memop &= ~MO_SIGN;
1161
- }
1162
- }
1163
-
1164
- addr = plugin_prep_mem_callbacks(addr);
1165
- gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
1166
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1167
-
1168
- if ((orig_memop ^ memop) & MO_BSWAP) {
1169
- switch (orig_memop & MO_SIZE) {
1170
- case MO_16:
1171
- tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
1172
- ? TCG_BSWAP_IZ | TCG_BSWAP_OS
1173
- : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
1174
- break;
1175
- case MO_32:
1176
- tcg_gen_bswap32_i32(val, val);
1177
- break;
1178
- default:
1179
- g_assert_not_reached();
1180
- }
1181
- }
1182
-}
1183
-
1184
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1185
-{
1186
- TCGv_i32 swap = NULL;
1187
- MemOpIdx oi;
1188
-
1189
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1190
- memop = tcg_canonicalize_memop(memop, 0, 1);
1191
- oi = make_memop_idx(memop, idx);
1192
-
1193
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1194
- swap = tcg_temp_ebb_new_i32();
1195
- switch (memop & MO_SIZE) {
1196
- case MO_16:
1197
- tcg_gen_bswap16_i32(swap, val, 0);
1198
- break;
1199
- case MO_32:
1200
- tcg_gen_bswap32_i32(swap, val);
1201
- break;
1202
- default:
1203
- g_assert_not_reached();
1204
- }
1205
- val = swap;
1206
- memop &= ~MO_BSWAP;
1207
- }
1208
-
1209
- addr = plugin_prep_mem_callbacks(addr);
1210
- if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
1211
- gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
1212
- } else {
1213
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
1214
- }
1215
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1216
-
1217
- if (swap) {
1218
- tcg_temp_free_i32(swap);
1219
- }
1220
-}
1221
-
1222
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1223
-{
1224
- MemOp orig_memop;
1225
- MemOpIdx oi;
1226
-
1227
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1228
- tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
1229
- if (memop & MO_SIGN) {
1230
- tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
1231
- } else {
1232
- tcg_gen_movi_i32(TCGV_HIGH(val), 0);
1233
- }
1234
- return;
1235
- }
1236
-
1237
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1238
- memop = tcg_canonicalize_memop(memop, 1, 0);
1239
- oi = make_memop_idx(memop, idx);
1240
-
1241
- orig_memop = memop;
1242
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1243
- memop &= ~MO_BSWAP;
1244
- /* The bswap primitive benefits from zero-extended input. */
1245
- if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
1246
- memop &= ~MO_SIGN;
1247
- }
1248
- }
1249
-
1250
- addr = plugin_prep_mem_callbacks(addr);
1251
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
1252
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1253
-
1254
- if ((orig_memop ^ memop) & MO_BSWAP) {
1255
- int flags = (orig_memop & MO_SIGN
1256
- ? TCG_BSWAP_IZ | TCG_BSWAP_OS
1257
- : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
1258
- switch (orig_memop & MO_SIZE) {
1259
- case MO_16:
1260
- tcg_gen_bswap16_i64(val, val, flags);
1261
- break;
1262
- case MO_32:
1263
- tcg_gen_bswap32_i64(val, val, flags);
1264
- break;
1265
- case MO_64:
1266
- tcg_gen_bswap64_i64(val, val);
1267
- break;
1268
- default:
1269
- g_assert_not_reached();
1270
- }
1271
- }
1272
-}
1273
-
1274
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1275
-{
1276
- TCGv_i64 swap = NULL;
1277
- MemOpIdx oi;
1278
-
1279
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1280
- tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
1281
- return;
1282
- }
1283
-
1284
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1285
- memop = tcg_canonicalize_memop(memop, 1, 1);
1286
- oi = make_memop_idx(memop, idx);
1287
-
1288
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1289
- swap = tcg_temp_ebb_new_i64();
1290
- switch (memop & MO_SIZE) {
1291
- case MO_16:
1292
- tcg_gen_bswap16_i64(swap, val, 0);
1293
- break;
1294
- case MO_32:
1295
- tcg_gen_bswap32_i64(swap, val, 0);
1296
- break;
1297
- case MO_64:
1298
- tcg_gen_bswap64_i64(swap, val);
1299
- break;
1300
- default:
1301
- g_assert_not_reached();
1302
- }
1303
- val = swap;
1304
- memop &= ~MO_BSWAP;
1305
- }
1306
-
1307
- addr = plugin_prep_mem_callbacks(addr);
1308
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
1309
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1310
-
1311
- if (swap) {
1312
- tcg_temp_free_i64(swap);
1313
- }
1314
-}
1315
-
1316
-/*
1317
- * Return true if @mop, without knowledge of the pointer alignment,
1318
- * does not require 16-byte atomicity, and it would be adventagous
1319
- * to avoid a call to a helper function.
1320
- */
1321
-static bool use_two_i64_for_i128(MemOp mop)
1322
-{
1323
-#ifdef CONFIG_SOFTMMU
1324
- /* Two softmmu tlb lookups is larger than one function call. */
1325
- return false;
1326
-#else
1327
- /*
1328
- * For user-only, two 64-bit operations may well be smaller than a call.
1329
- * Determine if that would be legal for the requested atomicity.
1330
- */
1331
- switch (mop & MO_ATOM_MASK) {
1332
- case MO_ATOM_NONE:
1333
- case MO_ATOM_IFALIGN_PAIR:
1334
- return true;
1335
- case MO_ATOM_IFALIGN:
1336
- case MO_ATOM_SUBALIGN:
1337
- case MO_ATOM_WITHIN16:
1338
- case MO_ATOM_WITHIN16_PAIR:
1339
- /* In a serialized context, no atomicity is required. */
1340
- return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
1341
- default:
1342
- g_assert_not_reached();
1343
- }
1344
-#endif
1345
-}
1346
-
1347
-static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
1348
-{
1349
- MemOp mop_1 = orig, mop_2;
1350
-
1351
- tcg_debug_assert((orig & MO_SIZE) == MO_128);
1352
- tcg_debug_assert((orig & MO_SIGN) == 0);
1353
-
1354
- /* Reduce the size to 64-bit. */
1355
- mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
1356
-
1357
- /* Retain the alignment constraints of the original. */
1358
- switch (orig & MO_AMASK) {
1359
- case MO_UNALN:
1360
- case MO_ALIGN_2:
1361
- case MO_ALIGN_4:
1362
- mop_2 = mop_1;
1363
- break;
1364
- case MO_ALIGN_8:
1365
- /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
1366
- mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
1367
- mop_2 = mop_1;
1368
- break;
1369
- case MO_ALIGN:
1370
- /* Second has 8-byte alignment; first has 16-byte alignment. */
1371
- mop_2 = mop_1;
1372
- mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
1373
- break;
1374
- case MO_ALIGN_16:
1375
- case MO_ALIGN_32:
1376
- case MO_ALIGN_64:
1377
- /* Second has 8-byte alignment; first retains original. */
1378
- mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
1379
- break;
1380
- default:
1381
- g_assert_not_reached();
1382
- }
1383
-
1384
- /* Use a memory ordering implemented by the host. */
1385
- if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
1386
- mop_1 &= ~MO_BSWAP;
1387
- mop_2 &= ~MO_BSWAP;
1388
- }
1389
-
1390
- ret[0] = mop_1;
1391
- ret[1] = mop_2;
1392
-}
1393
-
1394
-#if TARGET_LONG_BITS == 64
1395
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
1396
-#else
1397
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
1398
-#endif
1399
-
1400
-void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1401
-{
1402
- const MemOpIdx oi = make_memop_idx(memop, idx);
1403
-
1404
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1405
- tcg_debug_assert((memop & MO_SIGN) == 0);
1406
-
1407
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1408
- addr = plugin_prep_mem_callbacks(addr);
1409
-
1410
- /* TODO: For now, force 32-bit hosts to use the helper. */
1411
- if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
1412
- TCGv_i64 lo, hi;
1413
- TCGArg addr_arg;
1414
- MemOpIdx adj_oi;
1415
- bool need_bswap = false;
1416
-
1417
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1418
- lo = TCGV128_HIGH(val);
1419
- hi = TCGV128_LOW(val);
1420
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
1421
- need_bswap = true;
1422
- } else {
1423
- lo = TCGV128_LOW(val);
1424
- hi = TCGV128_HIGH(val);
1425
- adj_oi = oi;
1426
- }
1427
-
1428
-#if TARGET_LONG_BITS == 32
1429
- addr_arg = tcgv_i32_arg(addr);
1430
-#else
1431
- addr_arg = tcgv_i64_arg(addr);
1432
-#endif
1433
- tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
1434
-
1435
- if (need_bswap) {
1436
- tcg_gen_bswap64_i64(lo, lo);
1437
- tcg_gen_bswap64_i64(hi, hi);
1438
- }
1439
- } else if (use_two_i64_for_i128(memop)) {
1440
- MemOp mop[2];
1441
- TCGv addr_p8;
1442
- TCGv_i64 x, y;
1443
-
1444
- canonicalize_memop_i128_as_i64(mop, memop);
1445
-
1446
- /*
1447
- * Since there are no global TCGv_i128, there is no visible state
1448
- * changed if the second load faults. Load directly into the two
1449
- * subwords.
1450
- */
1451
- if ((memop & MO_BSWAP) == MO_LE) {
1452
- x = TCGV128_LOW(val);
1453
- y = TCGV128_HIGH(val);
1454
- } else {
1455
- x = TCGV128_HIGH(val);
1456
- y = TCGV128_LOW(val);
1457
- }
1458
-
1459
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
1460
-
1461
- if ((mop[0] ^ memop) & MO_BSWAP) {
1462
- tcg_gen_bswap64_i64(x, x);
1463
- }
1464
-
1465
- addr_p8 = tcg_temp_ebb_new();
1466
- tcg_gen_addi_tl(addr_p8, addr, 8);
1467
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
1468
- tcg_temp_free(addr_p8);
1469
-
1470
- if ((mop[0] ^ memop) & MO_BSWAP) {
1471
- tcg_gen_bswap64_i64(y, y);
1472
- }
1473
- } else {
1474
- gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
1475
- }
1476
-
1477
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1478
-}
1479
-
1480
-void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1481
-{
1482
- const MemOpIdx oi = make_memop_idx(memop, idx);
1483
-
1484
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1485
- tcg_debug_assert((memop & MO_SIGN) == 0);
1486
-
1487
- tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
1488
- addr = plugin_prep_mem_callbacks(addr);
1489
-
1490
- /* TODO: For now, force 32-bit hosts to use the helper. */
1491
-
1492
- if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
1493
- TCGv_i64 lo, hi;
1494
- TCGArg addr_arg;
1495
- MemOpIdx adj_oi;
1496
- bool need_bswap = false;
1497
-
1498
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1499
- lo = tcg_temp_new_i64();
1500
- hi = tcg_temp_new_i64();
1501
- tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
1502
- tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
1503
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
1504
- need_bswap = true;
1505
- } else {
1506
- lo = TCGV128_LOW(val);
1507
- hi = TCGV128_HIGH(val);
1508
- adj_oi = oi;
1509
- }
1510
-
1511
-#if TARGET_LONG_BITS == 32
1512
- addr_arg = tcgv_i32_arg(addr);
1513
-#else
1514
- addr_arg = tcgv_i64_arg(addr);
1515
-#endif
1516
- tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
1517
-
1518
- if (need_bswap) {
1519
- tcg_temp_free_i64(lo);
1520
- tcg_temp_free_i64(hi);
1521
- }
1522
- } else if (use_two_i64_for_i128(memop)) {
1523
- MemOp mop[2];
1524
- TCGv addr_p8;
1525
- TCGv_i64 x, y;
1526
-
1527
- canonicalize_memop_i128_as_i64(mop, memop);
1528
-
1529
- if ((memop & MO_BSWAP) == MO_LE) {
1530
- x = TCGV128_LOW(val);
1531
- y = TCGV128_HIGH(val);
1532
- } else {
1533
- x = TCGV128_HIGH(val);
1534
- y = TCGV128_LOW(val);
1535
- }
1536
-
1537
- addr_p8 = tcg_temp_ebb_new();
1538
- if ((mop[0] ^ memop) & MO_BSWAP) {
1539
- TCGv_i64 t = tcg_temp_ebb_new_i64();
1540
-
1541
- tcg_gen_bswap64_i64(t, x);
1542
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
1543
- tcg_gen_bswap64_i64(t, y);
1544
- tcg_gen_addi_tl(addr_p8, addr, 8);
1545
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
1546
- tcg_temp_free_i64(t);
1547
- } else {
1548
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
1549
- tcg_gen_addi_tl(addr_p8, addr, 8);
1550
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
1551
- }
1552
- tcg_temp_free(addr_p8);
1553
- } else {
1554
- gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
1555
- }
1556
-
1557
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1558
-}
1559
-
1560
-static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
1561
-{
1562
- switch (opc & MO_SSIZE) {
1563
- case MO_SB:
1564
- tcg_gen_ext8s_i32(ret, val);
1565
- break;
1566
- case MO_UB:
1567
- tcg_gen_ext8u_i32(ret, val);
1568
- break;
1569
- case MO_SW:
1570
- tcg_gen_ext16s_i32(ret, val);
1571
- break;
1572
- case MO_UW:
1573
- tcg_gen_ext16u_i32(ret, val);
1574
- break;
1575
- default:
1576
- tcg_gen_mov_i32(ret, val);
1577
- break;
1578
- }
1579
-}
1580
-
1581
-static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
1582
-{
1583
- switch (opc & MO_SSIZE) {
1584
- case MO_SB:
1585
- tcg_gen_ext8s_i64(ret, val);
1586
- break;
1587
- case MO_UB:
1588
- tcg_gen_ext8u_i64(ret, val);
1589
- break;
1590
- case MO_SW:
1591
- tcg_gen_ext16s_i64(ret, val);
1592
- break;
1593
- case MO_UW:
1594
- tcg_gen_ext16u_i64(ret, val);
1595
- break;
1596
- case MO_SL:
1597
- tcg_gen_ext32s_i64(ret, val);
1598
- break;
1599
- case MO_UL:
1600
- tcg_gen_ext32u_i64(ret, val);
1601
- break;
1602
- default:
1603
- tcg_gen_mov_i64(ret, val);
1604
- break;
1605
- }
1606
-}
1607
-
1608
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
1609
- TCGv_i32, TCGv_i32, TCGv_i32);
1610
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
1611
- TCGv_i64, TCGv_i64, TCGv_i32);
1612
-typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
1613
- TCGv_i128, TCGv_i128, TCGv_i32);
1614
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
1615
- TCGv_i32, TCGv_i32);
1616
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
1617
- TCGv_i64, TCGv_i32);
1618
-
1619
-#ifdef CONFIG_ATOMIC64
1620
-# define WITH_ATOMIC64(X) X,
1621
-#else
1622
-# define WITH_ATOMIC64(X)
1623
-#endif
1624
-#ifdef CONFIG_CMPXCHG128
1625
-# define WITH_ATOMIC128(X) X,
1626
-#else
1627
-# define WITH_ATOMIC128(X)
1628
-#endif
1629
-
1630
-static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
1631
- [MO_8] = gen_helper_atomic_cmpxchgb,
1632
- [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
1633
- [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
1634
- [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
1635
- [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
1636
- WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
1637
- WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
1638
- WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
1639
- WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
1640
-};
1641
-
1642
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1643
- TCGv_i32 newv, TCGArg idx, MemOp memop)
1644
-{
1645
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1646
- TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1647
-
1648
- tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
1649
-
1650
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
1651
- tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
1652
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
1653
- tcg_temp_free_i32(t2);
1654
-
1655
- if (memop & MO_SIGN) {
1656
- tcg_gen_ext_i32(retv, t1, memop);
1657
- } else {
1658
- tcg_gen_mov_i32(retv, t1);
1659
- }
1660
- tcg_temp_free_i32(t1);
1661
-}
1662
-
1663
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1664
- TCGv_i32 newv, TCGArg idx, MemOp memop)
1665
-{
1666
- gen_atomic_cx_i32 gen;
1667
- MemOpIdx oi;
1668
-
1669
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1670
- tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
1671
- return;
1672
- }
1673
-
1674
- memop = tcg_canonicalize_memop(memop, 0, 0);
1675
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1676
- tcg_debug_assert(gen != NULL);
1677
-
1678
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1679
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1680
-
1681
- if (memop & MO_SIGN) {
1682
- tcg_gen_ext_i32(retv, retv, memop);
1683
- }
1684
-}
1685
-
1686
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1687
- TCGv_i64 newv, TCGArg idx, MemOp memop)
1688
-{
1689
- TCGv_i64 t1, t2;
1690
-
1691
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1692
- tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1693
- TCGV_LOW(newv), idx, memop);
1694
- if (memop & MO_SIGN) {
1695
- tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1696
- } else {
1697
- tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1698
- }
1699
- return;
1700
- }
1701
-
1702
- t1 = tcg_temp_ebb_new_i64();
1703
- t2 = tcg_temp_ebb_new_i64();
1704
-
1705
- tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
1706
-
1707
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
1708
- tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
1709
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
1710
- tcg_temp_free_i64(t2);
1711
-
1712
- if (memop & MO_SIGN) {
1713
- tcg_gen_ext_i64(retv, t1, memop);
1714
- } else {
1715
- tcg_gen_mov_i64(retv, t1);
1716
- }
1717
- tcg_temp_free_i64(t1);
1718
-}
1719
-
1720
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1721
- TCGv_i64 newv, TCGArg idx, MemOp memop)
1722
-{
1723
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1724
- tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
1725
- return;
1726
- }
1727
-
1728
- if ((memop & MO_SIZE) == MO_64) {
1729
- gen_atomic_cx_i64 gen;
1730
-
1731
- memop = tcg_canonicalize_memop(memop, 1, 0);
1732
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1733
- if (gen) {
1734
- MemOpIdx oi = make_memop_idx(memop, idx);
1735
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1736
- return;
1737
- }
1738
-
1739
- gen_helper_exit_atomic(cpu_env);
1740
-
1741
- /*
1742
- * Produce a result for a well-formed opcode stream. This satisfies
1743
- * liveness for set before used, which happens before this dead code
1744
- * is removed.
1745
- */
1746
- tcg_gen_movi_i64(retv, 0);
1747
- return;
1748
- }
1749
-
1750
- if (TCG_TARGET_REG_BITS == 32) {
1751
- tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1752
- TCGV_LOW(newv), idx, memop);
1753
- if (memop & MO_SIGN) {
1754
- tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1755
- } else {
1756
- tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1757
- }
1758
- } else {
1759
- TCGv_i32 c32 = tcg_temp_ebb_new_i32();
1760
- TCGv_i32 n32 = tcg_temp_ebb_new_i32();
1761
- TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1762
-
1763
- tcg_gen_extrl_i64_i32(c32, cmpv);
1764
- tcg_gen_extrl_i64_i32(n32, newv);
1765
- tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
1766
- tcg_temp_free_i32(c32);
1767
- tcg_temp_free_i32(n32);
1768
-
1769
- tcg_gen_extu_i32_i64(retv, r32);
1770
- tcg_temp_free_i32(r32);
1771
-
1772
- if (memop & MO_SIGN) {
1773
- tcg_gen_ext_i64(retv, retv, memop);
1774
- }
1775
- }
1776
-}
1777
-
1778
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1779
- TCGv_i128 newv, TCGArg idx, MemOp memop)
1780
-{
1781
- if (TCG_TARGET_REG_BITS == 32) {
1782
- /* Inline expansion below is simply too large for 32-bit hosts. */
1783
- gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
1784
- ? gen_helper_nonatomic_cmpxchgo_le
1785
- : gen_helper_nonatomic_cmpxchgo_be);
1786
- MemOpIdx oi = make_memop_idx(memop, idx);
1787
-
1788
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1789
- tcg_debug_assert((memop & MO_SIGN) == 0);
1790
-
1791
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1792
- } else {
1793
- TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1794
- TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1795
- TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1796
- TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1797
- TCGv_i64 z = tcg_constant_i64(0);
1798
-
1799
- tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
1800
-
1801
- /* Compare i128 */
1802
- tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
1803
- tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
1804
- tcg_gen_or_i64(t0, t0, t1);
1805
-
1806
- /* tmpv = equal ? newv : oldv */
1807
- tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
1808
- TCGV128_LOW(newv), TCGV128_LOW(oldv));
1809
- tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1810
- TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1811
-
1812
- /* Unconditional writeback. */
1813
- tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
1814
- tcg_gen_mov_i128(retv, oldv);
1815
-
1816
- tcg_temp_free_i64(t0);
1817
- tcg_temp_free_i64(t1);
1818
- tcg_temp_free_i128(tmpv);
1819
- tcg_temp_free_i128(oldv);
1820
- }
1821
-}
1822
-
1823
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1824
- TCGv_i128 newv, TCGArg idx, MemOp memop)
1825
-{
1826
- gen_atomic_cx_i128 gen;
1827
-
1828
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1829
- tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
1830
- return;
1831
- }
1832
-
1833
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1834
- tcg_debug_assert((memop & MO_SIGN) == 0);
1835
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1836
-
1837
- if (gen) {
1838
- MemOpIdx oi = make_memop_idx(memop, idx);
1839
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1840
- return;
1841
- }
1842
-
1843
- gen_helper_exit_atomic(cpu_env);
1844
-
1845
- /*
1846
- * Produce a result for a well-formed opcode stream. This satisfies
1847
- * liveness for set before used, which happens before this dead code
1848
- * is removed.
1849
- */
1850
- tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1851
- tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1852
-}
1853
-
1854
-static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1855
- TCGArg idx, MemOp memop, bool new_val,
1856
- void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1857
-{
1858
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1859
- TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1860
-
1861
- memop = tcg_canonicalize_memop(memop, 0, 0);
1862
-
1863
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
1864
- tcg_gen_ext_i32(t2, val, memop);
1865
- gen(t2, t1, t2);
1866
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
1867
-
1868
- tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1869
- tcg_temp_free_i32(t1);
1870
- tcg_temp_free_i32(t2);
1871
-}
1872
-
1873
-static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1874
- TCGArg idx, MemOp memop, void * const table[])
1875
-{
1876
- gen_atomic_op_i32 gen;
1877
- MemOpIdx oi;
1878
-
1879
- memop = tcg_canonicalize_memop(memop, 0, 0);
1880
-
1881
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
1882
- tcg_debug_assert(gen != NULL);
1883
-
1884
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1885
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
1886
-
1887
- if (memop & MO_SIGN) {
1888
- tcg_gen_ext_i32(ret, ret, memop);
1889
- }
1890
-}
1891
-
1892
-static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1893
- TCGArg idx, MemOp memop, bool new_val,
1894
- void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1895
-{
1896
- TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1897
- TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1898
-
1899
- memop = tcg_canonicalize_memop(memop, 1, 0);
1900
-
1901
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
1902
- tcg_gen_ext_i64(t2, val, memop);
1903
- gen(t2, t1, t2);
1904
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
1905
-
1906
- tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1907
- tcg_temp_free_i64(t1);
1908
- tcg_temp_free_i64(t2);
1909
-}
1910
-
1911
-static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1912
- TCGArg idx, MemOp memop, void * const table[])
1913
-{
1914
- memop = tcg_canonicalize_memop(memop, 1, 0);
1915
-
1916
- if ((memop & MO_SIZE) == MO_64) {
1917
-#ifdef CONFIG_ATOMIC64
1918
- gen_atomic_op_i64 gen;
1919
- MemOpIdx oi;
1920
-
1921
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
1922
- tcg_debug_assert(gen != NULL);
1923
-
1924
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1925
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
1926
-#else
1927
- gen_helper_exit_atomic(cpu_env);
1928
- /* Produce a result, so that we have a well-formed opcode stream
1929
- with respect to uses of the result in the (dead) code following. */
1930
- tcg_gen_movi_i64(ret, 0);
1931
-#endif /* CONFIG_ATOMIC64 */
1932
- } else {
1933
- TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1934
- TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1935
-
1936
- tcg_gen_extrl_i64_i32(v32, val);
1937
- do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1938
- tcg_temp_free_i32(v32);
1939
-
1940
- tcg_gen_extu_i32_i64(ret, r32);
1941
- tcg_temp_free_i32(r32);
1942
-
1943
- if (memop & MO_SIGN) {
1944
- tcg_gen_ext_i64(ret, ret, memop);
1945
- }
1946
- }
1947
-}
1948
-
1949
-#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
1950
-static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
1951
- [MO_8] = gen_helper_atomic_##NAME##b, \
1952
- [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
1953
- [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
1954
- [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
1955
- [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
1956
- WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
1957
- WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
1958
-}; \
1959
-void tcg_gen_atomic_##NAME##_i32 \
1960
- (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
1961
-{ \
1962
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1963
- do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
1964
- } else { \
1965
- do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
1966
- tcg_gen_##OP##_i32); \
1967
- } \
1968
-} \
1969
-void tcg_gen_atomic_##NAME##_i64 \
1970
- (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
1971
-{ \
1972
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1973
- do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
1974
- } else { \
1975
- do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
1976
- tcg_gen_##OP##_i64); \
1977
- } \
1978
-}
1979
-
1980
-GEN_ATOMIC_HELPER(fetch_add, add, 0)
1981
-GEN_ATOMIC_HELPER(fetch_and, and, 0)
1982
-GEN_ATOMIC_HELPER(fetch_or, or, 0)
1983
-GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1984
-GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1985
-GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1986
-GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1987
-GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1988
-
1989
-GEN_ATOMIC_HELPER(add_fetch, add, 1)
1990
-GEN_ATOMIC_HELPER(and_fetch, and, 1)
1991
-GEN_ATOMIC_HELPER(or_fetch, or, 1)
1992
-GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1993
-GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1994
-GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1995
-GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1996
-GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1997
-
1998
-static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1999
-{
2000
- tcg_gen_mov_i32(r, b);
2001
-}
2002
-
2003
-static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
2004
-{
2005
- tcg_gen_mov_i64(r, b);
2006
-}
2007
-
2008
-GEN_ATOMIC_HELPER(xchg, mov2, 0)
2009
-
2010
-#undef GEN_ATOMIC_HELPER
2011
diff --git a/tcg/meson.build b/tcg/meson.build
2012
index XXXXXXX..XXXXXXX 100644
2013
--- a/tcg/meson.build
2014
+++ b/tcg/meson.build
2015
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(files(
2016
'tcg.c',
2017
'tcg-common.c',
2018
'tcg-op.c',
2019
+ 'tcg-op-ldst.c',
2020
'tcg-op-gvec.c',
2021
'tcg-op-vec.c',
2022
))
2023
--
2024
2.34.1
2025
2026
diff view generated by jsdifflib
New patch
1
1
We already pass uint64_t to restore_state_to_opc; this changes all
2
of the other uses from insn_start through the encoding to decoding.
3
4
Reviewed-by: Anton Johansson <anjo@rev.ng>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op.h | 39 +++++++++------------------------------
9
include/tcg/tcg-opc.h | 2 +-
10
include/tcg/tcg.h | 30 +++++++++++++++---------------
11
accel/tcg/translate-all.c | 28 ++++++++++++++++------------
12
tcg/tcg.c | 18 ++++--------------
13
5 files changed, 45 insertions(+), 72 deletions(-)
14
15
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op.h
18
+++ b/include/tcg/tcg-op.h
19
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
20
#endif
21
22
#if TARGET_INSN_START_WORDS == 1
23
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
24
static inline void tcg_gen_insn_start(target_ulong pc)
25
{
26
- tcg_gen_op1(INDEX_op_insn_start, pc);
27
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS);
28
+ tcg_set_insn_start_param(op, 0, pc);
29
}
30
-# else
31
-static inline void tcg_gen_insn_start(target_ulong pc)
32
-{
33
- tcg_gen_op2(INDEX_op_insn_start, (uint32_t)pc, (uint32_t)(pc >> 32));
34
-}
35
-# endif
36
#elif TARGET_INSN_START_WORDS == 2
37
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
38
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
39
{
40
- tcg_gen_op2(INDEX_op_insn_start, pc, a1);
41
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 2 * 64 / TCG_TARGET_REG_BITS);
42
+ tcg_set_insn_start_param(op, 0, pc);
43
+ tcg_set_insn_start_param(op, 1, a1);
44
}
45
-# else
46
-static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
47
-{
48
- tcg_gen_op4(INDEX_op_insn_start,
49
- (uint32_t)pc, (uint32_t)(pc >> 32),
50
- (uint32_t)a1, (uint32_t)(a1 >> 32));
51
-}
52
-# endif
53
#elif TARGET_INSN_START_WORDS == 3
54
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
55
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
56
target_ulong a2)
57
{
58
- tcg_gen_op3(INDEX_op_insn_start, pc, a1, a2);
59
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 3 * 64 / TCG_TARGET_REG_BITS);
60
+ tcg_set_insn_start_param(op, 0, pc);
61
+ tcg_set_insn_start_param(op, 1, a1);
62
+ tcg_set_insn_start_param(op, 2, a2);
63
}
64
-# else
65
-static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
66
- target_ulong a2)
67
-{
68
- tcg_gen_op6(INDEX_op_insn_start,
69
- (uint32_t)pc, (uint32_t)(pc >> 32),
70
- (uint32_t)a1, (uint32_t)(a1 >> 32),
71
- (uint32_t)a2, (uint32_t)(a2 >> 32));
72
-}
73
-# endif
74
#else
75
# error "Unhandled number of operands to insn_start"
76
#endif
77
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/tcg/tcg-opc.h
80
+++ b/include/tcg/tcg-opc.h
81
@@ -XXX,XX +XXX,XX @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
82
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
83
84
/* QEMU specific */
85
-DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
86
+DEF(insn_start, 0, 0, DATA64_ARGS * TARGET_INSN_START_WORDS,
87
TCG_OPF_NOT_PRESENT)
88
DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
89
DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
90
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
91
index XXXXXXX..XXXXXXX 100644
92
--- a/include/tcg/tcg.h
93
+++ b/include/tcg/tcg.h
94
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
95
TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS];
96
97
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
98
- target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
99
+ uint64_t gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
100
101
/* Exit to translator on overflow. */
102
sigjmp_buf jmp_trans;
103
@@ -XXX,XX +XXX,XX @@ static inline void tcg_set_insn_param(TCGOp *op, int arg, TCGArg v)
104
op->args[arg] = v;
105
}
106
107
-static inline target_ulong tcg_get_insn_start_param(TCGOp *op, int arg)
108
+static inline uint64_t tcg_get_insn_start_param(TCGOp *op, int arg)
109
{
110
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
111
- return tcg_get_insn_param(op, arg);
112
-#else
113
- return tcg_get_insn_param(op, arg * 2) |
114
- ((uint64_t)tcg_get_insn_param(op, arg * 2 + 1) << 32);
115
-#endif
116
+ if (TCG_TARGET_REG_BITS == 64) {
117
+ return tcg_get_insn_param(op, arg);
118
+ } else {
119
+ return deposit64(tcg_get_insn_param(op, arg * 2), 32, 32,
120
+ tcg_get_insn_param(op, arg * 2 + 1));
121
+ }
122
}
123
124
-static inline void tcg_set_insn_start_param(TCGOp *op, int arg, target_ulong v)
125
+static inline void tcg_set_insn_start_param(TCGOp *op, int arg, uint64_t v)
126
{
127
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
128
- tcg_set_insn_param(op, arg, v);
129
-#else
130
- tcg_set_insn_param(op, arg * 2, v);
131
- tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
132
-#endif
133
+ if (TCG_TARGET_REG_BITS == 64) {
134
+ tcg_set_insn_param(op, arg, v);
135
+ } else {
136
+ tcg_set_insn_param(op, arg * 2, v);
137
+ tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
138
+ }
139
}
140
141
/* The last op that was emitted. */
142
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/accel/tcg/translate-all.c
145
+++ b/accel/tcg/translate-all.c
146
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
147
148
TBContext tb_ctx;
149
150
-/* Encode VAL as a signed leb128 sequence at P.
151
- Return P incremented past the encoded value. */
152
-static uint8_t *encode_sleb128(uint8_t *p, target_long val)
153
+/*
154
+ * Encode VAL as a signed leb128 sequence at P.
155
+ * Return P incremented past the encoded value.
156
+ */
157
+static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
158
{
159
int more, byte;
160
161
@@ -XXX,XX +XXX,XX @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val)
162
return p;
163
}
164
165
-/* Decode a signed leb128 sequence at *PP; increment *PP past the
166
- decoded value. Return the decoded value. */
167
-static target_long decode_sleb128(const uint8_t **pp)
168
+/*
169
+ * Decode a signed leb128 sequence at *PP; increment *PP past the
170
+ * decoded value. Return the decoded value.
171
+ */
172
+static int64_t decode_sleb128(const uint8_t **pp)
173
{
174
const uint8_t *p = *pp;
175
- target_long val = 0;
176
+ int64_t val = 0;
177
int byte, shift = 0;
178
179
do {
180
byte = *p++;
181
- val |= (target_ulong)(byte & 0x7f) << shift;
182
+ val |= (int64_t)(byte & 0x7f) << shift;
183
shift += 7;
184
} while (byte & 0x80);
185
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
186
- val |= -(target_ulong)1 << shift;
187
+ val |= -(int64_t)1 << shift;
188
}
189
190
*pp = p;
191
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
192
int i, j, n;
193
194
for (i = 0, n = tb->icount; i < n; ++i) {
195
- target_ulong prev;
196
+ uint64_t prev;
197
198
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
199
if (i == 0) {
200
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
201
/* Dump header and the first instruction */
202
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
203
fprintf(logfile,
204
- " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
205
+ " -- guest addr 0x%016" PRIx64 " + tb prologue\n",
206
tcg_ctx->gen_insn_data[insn][0]);
207
chunk_start = tcg_ctx->gen_insn_end_off[insn];
208
disas(logfile, tb->tc.ptr, chunk_start);
209
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
210
while (insn < tb->icount) {
211
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
212
if (chunk_end > chunk_start) {
213
- fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n",
214
+ fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
215
tcg_ctx->gen_insn_data[insn][0]);
216
disas(logfile, tb->tc.ptr + chunk_start,
217
chunk_end - chunk_start);
218
diff --git a/tcg/tcg.c b/tcg/tcg.c
219
index XXXXXXX..XXXXXXX 100644
220
--- a/tcg/tcg.c
221
+++ b/tcg/tcg.c
222
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
223
col += ne_fprintf(f, "\n ----");
224
225
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
226
- target_ulong a;
227
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
228
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
229
-#else
230
- a = op->args[i];
231
-#endif
232
- col += ne_fprintf(f, " " TARGET_FMT_lx, a);
233
+ col += ne_fprintf(f, " %016" PRIx64,
234
+ tcg_get_insn_start_param(op, i));
235
}
236
} else if (c == INDEX_op_call) {
237
const TCGHelperInfo *info = tcg_call_info(op);
238
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
239
}
240
num_insns++;
241
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
242
- target_ulong a;
243
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
244
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
245
-#else
246
- a = op->args[i];
247
-#endif
248
- s->gen_insn_data[num_insns][i] = a;
249
+ s->gen_insn_data[num_insns][i] =
250
+ tcg_get_insn_start_param(op, i);
251
}
252
break;
253
case INDEX_op_discard:
254
--
255
2.34.1
256
257
diff view generated by jsdifflib
New patch
1
Always pass the target address as uint64_t.
2
Adjust tcg_out_{ld,st}_helper_args to match.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/tcg/tcg-ldst.h | 26 +++++++++---------
8
accel/tcg/cputlb.c | 26 +++++++++---------
9
accel/tcg/user-exec.c | 26 +++++++++---------
10
tcg/tcg.c | 62 ++++++++++++++++++++++++++++++++----------
11
4 files changed, 87 insertions(+), 53 deletions(-)
12
13
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-ldst.h
16
+++ b/include/tcg/tcg-ldst.h
17
@@ -XXX,XX +XXX,XX @@
18
#define TCG_LDST_H
19
20
/* Value zero-extended to tcg register size. */
21
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
22
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
23
MemOpIdx oi, uintptr_t retaddr);
24
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
25
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
26
MemOpIdx oi, uintptr_t retaddr);
27
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
28
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
29
MemOpIdx oi, uintptr_t retaddr);
30
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
31
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
32
MemOpIdx oi, uintptr_t retaddr);
33
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
34
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
35
MemOpIdx oi, uintptr_t retaddr);
36
37
/* Value sign-extended to tcg register size. */
38
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
39
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
40
MemOpIdx oi, uintptr_t retaddr);
41
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
42
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
43
MemOpIdx oi, uintptr_t retaddr);
44
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
45
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
46
MemOpIdx oi, uintptr_t retaddr);
47
48
/*
49
* Value extended to at least uint32_t, so that some ABIs do not require
50
* zero-extension from uint8_t or uint16_t.
51
*/
52
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
53
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
54
MemOpIdx oi, uintptr_t retaddr);
55
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
56
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
57
MemOpIdx oi, uintptr_t retaddr);
58
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
59
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
60
MemOpIdx oi, uintptr_t retaddr);
61
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
62
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
63
MemOpIdx oi, uintptr_t retaddr);
64
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
65
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
66
MemOpIdx oi, uintptr_t retaddr);
67
68
#endif /* TCG_LDST_H */
69
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/accel/tcg/cputlb.c
72
+++ b/accel/tcg/cputlb.c
73
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
74
return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
75
}
76
77
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
78
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
79
MemOpIdx oi, uintptr_t retaddr)
80
{
81
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
82
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
83
return ret;
84
}
85
86
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
87
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
88
MemOpIdx oi, uintptr_t retaddr)
89
{
90
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
91
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
92
return ret;
93
}
94
95
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
96
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
97
MemOpIdx oi, uintptr_t retaddr)
98
{
99
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
100
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
101
return ret;
102
}
103
104
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
105
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
106
MemOpIdx oi, uintptr_t retaddr)
107
{
108
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
109
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
110
* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
111
*/
112
113
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
114
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
115
MemOpIdx oi, uintptr_t retaddr)
116
{
117
return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
118
}
119
120
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
121
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
122
MemOpIdx oi, uintptr_t retaddr)
123
{
124
return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
125
}
126
127
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
128
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
129
MemOpIdx oi, uintptr_t retaddr)
130
{
131
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
132
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
133
return ret;
134
}
135
136
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
137
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
138
uint32_t oi, uintptr_t retaddr)
139
{
140
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
141
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
142
}
143
}
144
145
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
146
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
147
MemOpIdx oi, uintptr_t ra)
148
{
149
MMULookupLocals l;
150
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
151
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
152
}
153
154
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
155
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
156
MemOpIdx oi, uintptr_t retaddr)
157
{
158
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
159
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
160
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
161
}
162
163
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
164
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
165
MemOpIdx oi, uintptr_t retaddr)
166
{
167
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
168
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
169
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
170
}
171
172
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
173
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
174
MemOpIdx oi, uintptr_t retaddr)
175
{
176
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
177
@@ -XXX,XX +XXX,XX @@ static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
178
}
179
}
180
181
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
182
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
183
MemOpIdx oi, uintptr_t retaddr)
184
{
185
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
186
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/accel/tcg/user-exec.c
189
+++ b/accel/tcg/user-exec.c
190
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
191
return ret;
192
}
193
194
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
195
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
196
MemOpIdx oi, uintptr_t ra)
197
{
198
return do_ld1_mmu(env, addr, get_memop(oi), ra);
199
}
200
201
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
202
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
203
MemOpIdx oi, uintptr_t ra)
204
{
205
return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
206
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
207
return ret;
208
}
209
210
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
211
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
212
MemOpIdx oi, uintptr_t ra)
213
{
214
MemOp mop = get_memop(oi);
215
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
216
return ret;
217
}
218
219
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
220
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
221
MemOpIdx oi, uintptr_t ra)
222
{
223
MemOp mop = get_memop(oi);
224
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
225
return ret;
226
}
227
228
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
229
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
230
MemOpIdx oi, uintptr_t ra)
231
{
232
MemOp mop = get_memop(oi);
233
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
234
return ret;
235
}
236
237
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
238
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
239
MemOpIdx oi, uintptr_t ra)
240
{
241
MemOp mop = get_memop(oi);
242
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
243
return ret;
244
}
245
246
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
247
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
248
MemOpIdx oi, uintptr_t ra)
249
{
250
MemOp mop = get_memop(oi);
251
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
252
return ret;
253
}
254
255
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
256
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
257
MemOpIdx oi, uintptr_t ra)
258
{
259
MemOp mop = get_memop(oi);
260
@@ -XXX,XX +XXX,XX @@ static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
261
clear_helper_retaddr();
262
}
263
264
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
265
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
266
MemOpIdx oi, uintptr_t ra)
267
{
268
do_st1_mmu(env, addr, val, get_memop(oi), ra);
269
@@ -XXX,XX +XXX,XX @@ static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
270
clear_helper_retaddr();
271
}
272
273
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
274
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
275
MemOpIdx oi, uintptr_t ra)
276
{
277
MemOp mop = get_memop(oi);
278
@@ -XXX,XX +XXX,XX @@ static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
279
clear_helper_retaddr();
280
}
281
282
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
283
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
284
MemOpIdx oi, uintptr_t ra)
285
{
286
MemOp mop = get_memop(oi);
287
@@ -XXX,XX +XXX,XX @@ static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
288
clear_helper_retaddr();
289
}
290
291
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
292
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
293
MemOpIdx oi, uintptr_t ra)
294
{
295
MemOp mop = get_memop(oi);
296
@@ -XXX,XX +XXX,XX @@ static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
297
clear_helper_retaddr();
298
}
299
300
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
301
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
302
MemOpIdx oi, uintptr_t ra)
303
{
304
MemOp mop = get_memop(oi);
305
diff --git a/tcg/tcg.c b/tcg/tcg.c
306
index XXXXXXX..XXXXXXX 100644
307
--- a/tcg/tcg.c
308
+++ b/tcg/tcg.c
309
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld32_mmu = {
310
.flags = TCG_CALL_NO_WG,
311
.typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
312
| dh_typemask(env, 1)
313
- | dh_typemask(tl, 2) /* target_ulong addr */
314
+ | dh_typemask(i64, 2) /* uint64_t addr */
315
| dh_typemask(i32, 3) /* unsigned oi */
316
| dh_typemask(ptr, 4) /* uintptr_t ra */
317
};
318
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld64_mmu = {
319
.flags = TCG_CALL_NO_WG,
320
.typemask = dh_typemask(i64, 0) /* return uint64_t */
321
| dh_typemask(env, 1)
322
- | dh_typemask(tl, 2) /* target_ulong addr */
323
+ | dh_typemask(i64, 2) /* uint64_t addr */
324
| dh_typemask(i32, 3) /* unsigned oi */
325
| dh_typemask(ptr, 4) /* uintptr_t ra */
326
};
327
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld128_mmu = {
328
.flags = TCG_CALL_NO_WG,
329
.typemask = dh_typemask(i128, 0) /* return Int128 */
330
| dh_typemask(env, 1)
331
- | dh_typemask(tl, 2) /* target_ulong addr */
332
+ | dh_typemask(i64, 2) /* uint64_t addr */
333
| dh_typemask(i32, 3) /* unsigned oi */
334
| dh_typemask(ptr, 4) /* uintptr_t ra */
335
};
336
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st32_mmu = {
337
.flags = TCG_CALL_NO_WG,
338
.typemask = dh_typemask(void, 0)
339
| dh_typemask(env, 1)
340
- | dh_typemask(tl, 2) /* target_ulong addr */
341
+ | dh_typemask(i64, 2) /* uint64_t addr */
342
| dh_typemask(i32, 3) /* uint32_t data */
343
| dh_typemask(i32, 4) /* unsigned oi */
344
| dh_typemask(ptr, 5) /* uintptr_t ra */
345
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st64_mmu = {
346
.flags = TCG_CALL_NO_WG,
347
.typemask = dh_typemask(void, 0)
348
| dh_typemask(env, 1)
349
- | dh_typemask(tl, 2) /* target_ulong addr */
350
+ | dh_typemask(i64, 2) /* uint64_t addr */
351
| dh_typemask(i64, 3) /* uint64_t data */
352
| dh_typemask(i32, 4) /* unsigned oi */
353
| dh_typemask(ptr, 5) /* uintptr_t ra */
354
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st128_mmu = {
355
.flags = TCG_CALL_NO_WG,
356
.typemask = dh_typemask(void, 0)
357
| dh_typemask(env, 1)
358
- | dh_typemask(tl, 2) /* target_ulong addr */
359
+ | dh_typemask(i64, 2) /* uint64_t addr */
360
| dh_typemask(i128, 3) /* Int128 data */
361
| dh_typemask(i32, 4) /* unsigned oi */
362
| dh_typemask(ptr, 5) /* uintptr_t ra */
363
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
364
next_arg = 1;
365
366
loc = &info->in[next_arg];
367
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
368
- ldst->addrlo_reg, ldst->addrhi_reg);
369
- next_arg += nmov;
370
+ if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
371
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
372
+ ldst->addrlo_reg, ldst->addrhi_reg);
373
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
374
+ next_arg += nmov;
375
+ } else {
376
+ /*
377
+ * 32-bit host with 32-bit guest: zero-extend the guest address
378
+ * to 64-bits for the helper by storing the low part, then
379
+ * load a zero for the high part.
380
+ */
381
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
382
+ TCG_TYPE_I32, TCG_TYPE_I32,
383
+ ldst->addrlo_reg, -1);
384
+ tcg_out_helper_load_slots(s, 1, mov, parm);
385
386
- tcg_out_helper_load_slots(s, nmov, mov, parm);
387
+ tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
388
+ TCG_TYPE_I32, 0, parm);
389
+ next_arg += 2;
390
+ }
391
392
switch (info->out_kind) {
393
case TCG_CALL_RET_NORMAL:
394
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
395
396
/* Handle addr argument. */
397
loc = &info->in[next_arg];
398
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
399
- ldst->addrlo_reg, ldst->addrhi_reg);
400
- next_arg += n;
401
- nmov += n;
402
+ if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
403
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
404
+ ldst->addrlo_reg, ldst->addrhi_reg);
405
+ next_arg += n;
406
+ nmov += n;
407
+ } else {
408
+ /*
409
+ * 32-bit host with 32-bit guest: zero-extend the guest address
410
+ * to 64-bits for the helper by storing the low part. Later,
411
+ * after we have processed the register inputs, we will load a
412
+ * zero for the high part.
413
+ */
414
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
415
+ TCG_TYPE_I32, TCG_TYPE_I32,
416
+ ldst->addrlo_reg, -1);
417
+ next_arg += 2;
418
+ nmov += 1;
419
+ }
420
421
/* Handle data argument. */
422
loc = &info->in[next_arg];
423
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
424
g_assert_not_reached();
425
}
426
427
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32) {
428
+ loc = &info->in[1 + !HOST_BIG_ENDIAN];
429
+ tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
430
+ }
431
+
432
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
433
}
434
435
--
436
2.34.1
437
438
diff view generated by jsdifflib
New patch
1
Always pass the target address as uint64_t.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
accel/tcg/tcg-runtime.h | 4 ++--
7
accel/tcg/cputlb.c | 5 ++---
8
accel/tcg/user-exec.c | 5 ++---
9
tcg/tcg-op-ldst.c | 26 ++++++++++++++++++++++++--
10
4 files changed, 30 insertions(+), 10 deletions(-)
11
12
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/tcg-runtime.h
15
+++ b/accel/tcg/tcg-runtime.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
17
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
18
#endif /* IN_HELPER_PROTO */
19
20
-DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
21
-DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
22
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
23
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
24
25
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
26
i32, env, tl, i32, i32, i32)
27
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/accel/tcg/cputlb.c
30
+++ b/accel/tcg/cputlb.c
31
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
32
return do_ld16_mmu(env, addr, oi, retaddr);
33
}
34
35
-Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
36
+Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, uint32_t oi)
37
{
38
return helper_ld16_mmu(env, addr, oi, GETPC());
39
}
40
@@ -XXX,XX +XXX,XX @@ void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
41
do_st16_mmu(env, addr, val, oi, retaddr);
42
}
43
44
-void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
45
- MemOpIdx oi)
46
+void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
47
{
48
helper_st16_mmu(env, addr, val, oi, GETPC());
49
}
50
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/accel/tcg/user-exec.c
53
+++ b/accel/tcg/user-exec.c
54
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
55
return ret;
56
}
57
58
-Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
59
+Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
60
{
61
return helper_ld16_mmu(env, addr, oi, GETPC());
62
}
63
@@ -XXX,XX +XXX,XX @@ void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
64
do_st16_he_mmu(env, addr, val, mop, ra);
65
}
66
67
-void helper_st_i128(CPUArchState *env, target_ulong addr,
68
- Int128 val, MemOpIdx oi)
69
+void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
70
{
71
helper_st16_mmu(env, addr, val, oi, GETPC());
72
}
73
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-ldst.c
76
+++ b/tcg/tcg-op-ldst.c
77
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
78
#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
79
#endif
80
81
+static TCGv_i64 maybe_extend_addr64(TCGv addr)
82
+{
83
+#if TARGET_LONG_BITS == 32
84
+ TCGv_i64 a64 = tcg_temp_ebb_new_i64();
85
+ tcg_gen_extu_i32_i64(a64, addr);
86
+ return a64;
87
+#else
88
+ return addr;
89
+#endif
90
+}
91
+
92
+static void maybe_free_addr64(TCGv_i64 a64)
93
+{
94
+#if TARGET_LONG_BITS == 32
95
+ tcg_temp_free_i64(a64);
96
+#endif
97
+}
98
+
99
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
100
{
101
const MemOpIdx oi = make_memop_idx(memop, idx);
102
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
103
tcg_gen_bswap64_i64(y, y);
104
}
105
} else {
106
- gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
107
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
108
+ gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
109
+ maybe_free_addr64(a64);
110
}
111
112
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
113
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
114
}
115
tcg_temp_free(addr_p8);
116
} else {
117
- gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
118
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
119
+ gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
120
+ maybe_free_addr64(a64);
121
}
122
123
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
124
--
125
2.34.1
126
127
diff view generated by jsdifflib
New patch
1
1
Always pass the target address as uint64_t.
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
accel/tcg/tcg-runtime.h | 46 +++++++++++++++++------------------
7
tcg/tcg-op-ldst.c | 38 ++++++++++++++++++++---------
8
accel/tcg/atomic_common.c.inc | 14 +++++------
9
3 files changed, 57 insertions(+), 41 deletions(-)
10
11
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/tcg-runtime.h
14
+++ b/accel/tcg/tcg-runtime.h
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
16
DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
17
18
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
19
- i32, env, tl, i32, i32, i32)
20
+ i32, env, i64, i32, i32, i32)
21
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
22
- i32, env, tl, i32, i32, i32)
23
+ i32, env, i64, i32, i32, i32)
24
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
25
- i32, env, tl, i32, i32, i32)
26
+ i32, env, i64, i32, i32, i32)
27
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
28
- i32, env, tl, i32, i32, i32)
29
+ i32, env, i64, i32, i32, i32)
30
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
31
- i32, env, tl, i32, i32, i32)
32
+ i32, env, i64, i32, i32, i32)
33
#ifdef CONFIG_ATOMIC64
34
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
35
- i64, env, tl, i64, i64, i32)
36
+ i64, env, i64, i64, i64, i32)
37
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
38
- i64, env, tl, i64, i64, i32)
39
+ i64, env, i64, i64, i64, i32)
40
#endif
41
#ifdef CONFIG_CMPXCHG128
42
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
43
- i128, env, tl, i128, i128, i32)
44
+ i128, env, i64, i128, i128, i32)
45
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
46
- i128, env, tl, i128, i128, i32)
47
+ i128, env, i64, i128, i128, i32)
48
#endif
49
50
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
51
- i128, env, tl, i128, i128, i32)
52
+ i128, env, i64, i128, i128, i32)
53
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
54
- i128, env, tl, i128, i128, i32)
55
+ i128, env, i64, i128, i128, i32)
56
57
#ifdef CONFIG_ATOMIC64
58
#define GEN_ATOMIC_HELPERS(NAME) \
59
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
60
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
61
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
62
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
63
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
64
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
65
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
66
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
67
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
68
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
69
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
70
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
71
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
72
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
73
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
74
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le), \
75
- TCG_CALL_NO_WG, i64, env, tl, i64, i32) \
76
+ TCG_CALL_NO_WG, i64, env, i64, i64, i32) \
77
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be), \
78
- TCG_CALL_NO_WG, i64, env, tl, i64, i32)
79
+ TCG_CALL_NO_WG, i64, env, i64, i64, i32)
80
#else
81
#define GEN_ATOMIC_HELPERS(NAME) \
82
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
83
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
84
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
85
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
86
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
87
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
88
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
89
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
90
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
91
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
92
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
93
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
94
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
95
- TCG_CALL_NO_WG, i32, env, tl, i32, i32)
96
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32)
97
#endif /* CONFIG_ATOMIC64 */
98
99
GEN_ATOMIC_HELPERS(fetch_add)
100
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/tcg/tcg-op-ldst.c
103
+++ b/tcg/tcg-op-ldst.c
104
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
105
}
106
}
107
108
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
109
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
110
TCGv_i32, TCGv_i32, TCGv_i32);
111
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
112
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
113
TCGv_i64, TCGv_i64, TCGv_i32);
114
-typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
115
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
116
TCGv_i128, TCGv_i128, TCGv_i32);
117
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
118
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
119
TCGv_i32, TCGv_i32);
120
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
121
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
122
TCGv_i64, TCGv_i32);
123
124
#ifdef CONFIG_ATOMIC64
125
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
126
TCGv_i32 newv, TCGArg idx, MemOp memop)
127
{
128
gen_atomic_cx_i32 gen;
129
+ TCGv_i64 a64;
130
MemOpIdx oi;
131
132
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
133
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
134
tcg_debug_assert(gen != NULL);
135
136
oi = make_memop_idx(memop & ~MO_SIGN, idx);
137
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
138
+ a64 = maybe_extend_addr64(addr);
139
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
140
+ maybe_free_addr64(a64);
141
142
if (memop & MO_SIGN) {
143
tcg_gen_ext_i32(retv, retv, memop);
144
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
145
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
146
if (gen) {
147
MemOpIdx oi = make_memop_idx(memop, idx);
148
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
149
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
150
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
151
+ maybe_free_addr64(a64);
152
return;
153
}
154
155
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
156
? gen_helper_nonatomic_cmpxchgo_le
157
: gen_helper_nonatomic_cmpxchgo_be);
158
MemOpIdx oi = make_memop_idx(memop, idx);
159
+ TCGv_i64 a64;
160
161
tcg_debug_assert((memop & MO_SIZE) == MO_128);
162
tcg_debug_assert((memop & MO_SIGN) == 0);
163
164
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
165
+ a64 = maybe_extend_addr64(addr);
166
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
167
+ maybe_free_addr64(a64);
168
} else {
169
TCGv_i128 oldv = tcg_temp_ebb_new_i128();
170
TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
171
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
172
173
if (gen) {
174
MemOpIdx oi = make_memop_idx(memop, idx);
175
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
176
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
177
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
178
+ maybe_free_addr64(a64);
179
return;
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
183
TCGArg idx, MemOp memop, void * const table[])
184
{
185
gen_atomic_op_i32 gen;
186
+ TCGv_i64 a64;
187
MemOpIdx oi;
188
189
memop = tcg_canonicalize_memop(memop, 0, 0);
190
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
191
tcg_debug_assert(gen != NULL);
192
193
oi = make_memop_idx(memop & ~MO_SIGN, idx);
194
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
195
+ a64 = maybe_extend_addr64(addr);
196
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
197
+ maybe_free_addr64(a64);
198
199
if (memop & MO_SIGN) {
200
tcg_gen_ext_i32(ret, ret, memop);
201
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
202
if ((memop & MO_SIZE) == MO_64) {
203
#ifdef CONFIG_ATOMIC64
204
gen_atomic_op_i64 gen;
205
+ TCGv_i64 a64;
206
MemOpIdx oi;
207
208
gen = table[memop & (MO_SIZE | MO_BSWAP)];
209
tcg_debug_assert(gen != NULL);
210
211
oi = make_memop_idx(memop & ~MO_SIGN, idx);
212
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
213
+ a64 = maybe_extend_addr64(addr);
214
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
215
+ maybe_free_addr64(a64);
216
#else
217
gen_helper_exit_atomic(cpu_env);
218
/* Produce a result, so that we have a well-formed opcode stream
219
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
220
index XXXXXXX..XXXXXXX 100644
221
--- a/accel/tcg/atomic_common.c.inc
222
+++ b/accel/tcg/atomic_common.c.inc
223
@@ -XXX,XX +XXX,XX @@
224
* See the COPYING file in the top-level directory.
225
*/
226
227
-static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
228
+static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
229
MemOpIdx oi)
230
{
231
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
232
}
233
234
#if HAVE_ATOMIC128
235
-static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
236
+static void atomic_trace_ld_post(CPUArchState *env, uint64_t addr,
237
MemOpIdx oi)
238
{
239
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
240
}
241
242
-static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
243
+static void atomic_trace_st_post(CPUArchState *env, uint64_t addr,
244
MemOpIdx oi)
245
{
246
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
247
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
248
*/
249
250
#define CMPXCHG_HELPER(OP, TYPE) \
251
- TYPE HELPER(atomic_##OP)(CPUArchState *env, target_ulong addr, \
252
+ TYPE HELPER(atomic_##OP)(CPUArchState *env, uint64_t addr, \
253
TYPE oldv, TYPE newv, uint32_t oi) \
254
{ return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); }
255
256
@@ -XXX,XX +XXX,XX @@ CMPXCHG_HELPER(cmpxchgo_le, Int128)
257
258
#undef CMPXCHG_HELPER
259
260
-Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
261
+Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, uint64_t addr,
262
Int128 cmpv, Int128 newv, uint32_t oi)
263
{
264
#if TCG_TARGET_REG_BITS == 32
265
@@ -XXX,XX +XXX,XX @@ Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
266
#endif
267
}
268
269
-Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
270
+Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, uint64_t addr,
271
Int128 cmpv, Int128 newv, uint32_t oi)
272
{
273
#if TCG_TARGET_REG_BITS == 32
274
@@ -XXX,XX +XXX,XX @@ Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
275
}
276
277
#define ATOMIC_HELPER(OP, TYPE) \
278
- TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \
279
+ TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, uint64_t addr, \
280
TYPE val, uint32_t oi) \
281
{ return glue(glue(cpu_atomic_,OP),_mmu)(env, addr, val, oi, GETPC()); }
282
283
--
284
2.34.1
285
286
diff view generated by jsdifflib
New patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
include/tcg/tcg.h | 2 +-
5
tcg/tcg.c | 2 +-
6
2 files changed, 2 insertions(+), 2 deletions(-)
1
7
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
9
index XXXXXXX..XXXXXXX 100644
10
--- a/include/tcg/tcg.h
11
+++ b/include/tcg/tcg.h
12
@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
13
void tcg_prologue_init(TCGContext *s);
14
void tcg_func_start(TCGContext *s);
15
16
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
17
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start);
18
19
void tb_target_set_jmp_target(const TranslationBlock *, int,
20
uintptr_t, uintptr_t);
21
diff --git a/tcg/tcg.c b/tcg/tcg.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/tcg.c
24
+++ b/tcg/tcg.c
25
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
26
#endif
27
28
29
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
30
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
31
{
32
#ifdef CONFIG_PROFILER
33
TCGProfile *prof = &s->prof;
34
--
35
2.34.1
36
37
diff view generated by jsdifflib
New patch
1
As gen_mem_wrapped is only used in plugin_gen_empty_mem_callback,
2
we can avoid the curiosity of union mem_gen_fn by inlining it.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
accel/tcg/plugin-gen.c | 30 ++++++------------------------
8
1 file changed, 6 insertions(+), 24 deletions(-)
9
10
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/plugin-gen.c
13
+++ b/accel/tcg/plugin-gen.c
14
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
15
}
16
}
17
18
-union mem_gen_fn {
19
- void (*mem_fn)(TCGv, uint32_t);
20
- void (*inline_fn)(void);
21
-};
22
-
23
-static void gen_mem_wrapped(enum plugin_gen_cb type,
24
- const union mem_gen_fn *f, TCGv addr,
25
- uint32_t info, bool is_mem)
26
+void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
27
{
28
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
29
30
- gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
31
- if (is_mem) {
32
- f->mem_fn(addr, info);
33
- } else {
34
- f->inline_fn();
35
- }
36
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_MEM, rw);
37
+ gen_empty_mem_cb(addr, info);
38
tcg_gen_plugin_cb_end();
39
-}
40
41
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
42
-{
43
- union mem_gen_fn fn;
44
-
45
- fn.mem_fn = gen_empty_mem_cb;
46
- gen_mem_wrapped(PLUGIN_GEN_CB_MEM, &fn, addr, info, true);
47
-
48
- fn.inline_fn = gen_empty_inline_cb;
49
- gen_mem_wrapped(PLUGIN_GEN_CB_INLINE, &fn, 0, info, false);
50
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_INLINE, rw);
51
+ gen_empty_inline_cb();
52
+ tcg_gen_plugin_cb_end();
53
}
54
55
static TCGOp *find_op(TCGOp *op, TCGOpcode opc)
56
--
57
2.34.1
58
59
diff view generated by jsdifflib
New patch
1
As do_gen_mem_cb is called once, merge it into gen_empty_mem_cb.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
accel/tcg/plugin-gen.c | 39 +++++++++++++++++----------------------
7
1 file changed, 17 insertions(+), 22 deletions(-)
8
9
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/accel/tcg/plugin-gen.c
12
+++ b/accel/tcg/plugin-gen.c
13
@@ -XXX,XX +XXX,XX @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
14
void *userdata)
15
{ }
16
17
-static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
18
-{
19
- TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
20
- TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
21
- TCGv_i64 vaddr64 = tcg_temp_ebb_new_i64();
22
- TCGv_ptr udata = tcg_temp_ebb_new_ptr();
23
-
24
- tcg_gen_movi_i32(meminfo, info);
25
- tcg_gen_movi_ptr(udata, 0);
26
- tcg_gen_ld_i32(cpu_index, cpu_env,
27
- -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
28
- tcg_gen_extu_tl_i64(vaddr64, vaddr);
29
-
30
- gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, vaddr64, udata);
31
-
32
- tcg_temp_free_ptr(udata);
33
- tcg_temp_free_i64(vaddr64);
34
- tcg_temp_free_i32(meminfo);
35
- tcg_temp_free_i32(cpu_index);
36
-}
37
-
38
static void gen_empty_udata_cb(void)
39
{
40
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
41
@@ -XXX,XX +XXX,XX @@ static void gen_empty_inline_cb(void)
42
43
static void gen_empty_mem_cb(TCGv addr, uint32_t info)
44
{
45
- do_gen_mem_cb(addr, info);
46
+ TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
47
+ TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
48
+ TCGv_i64 addr64 = tcg_temp_ebb_new_i64();
49
+ TCGv_ptr udata = tcg_temp_ebb_new_ptr();
50
+
51
+ tcg_gen_movi_i32(meminfo, info);
52
+ tcg_gen_movi_ptr(udata, 0);
53
+ tcg_gen_ld_i32(cpu_index, cpu_env,
54
+ -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
55
+ tcg_gen_extu_tl_i64(addr64, addr);
56
+
57
+ gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr64, udata);
58
+
59
+ tcg_temp_free_ptr(udata);
60
+ tcg_temp_free_i64(addr64);
61
+ tcg_temp_free_i32(meminfo);
62
+ tcg_temp_free_i32(cpu_index);
63
}
64
65
/*
66
--
67
2.34.1
68
69
diff view generated by jsdifflib
New patch
1
We only need to make copies for loads, when the destination
2
overlaps the address. For now, only eliminate the copy for
3
stores and 128-bit loads.
1
4
5
Rename plugin_prep_mem_callbacks to plugin_maybe_preserve_addr,
6
returning NULL if no copy is made.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/tcg-op-ldst.c | 38 ++++++++++++++++++++------------------
12
1 file changed, 20 insertions(+), 18 deletions(-)
13
14
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/tcg-op-ldst.c
17
+++ b/tcg/tcg-op-ldst.c
18
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
19
}
20
}
21
22
-static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
23
+/* Only required for loads, where value might overlap addr. */
24
+static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
25
{
26
#ifdef CONFIG_PLUGIN
27
if (tcg_ctx->plugin_insn != NULL) {
28
@@ -XXX,XX +XXX,XX @@ static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
29
return temp;
30
}
31
#endif
32
- return vaddr;
33
+ return NULL;
34
}
35
36
-static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
37
- enum qemu_plugin_mem_rw rw)
38
+static void
39
+plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
40
+ enum qemu_plugin_mem_rw rw)
41
{
42
#ifdef CONFIG_PLUGIN
43
if (tcg_ctx->plugin_insn != NULL) {
44
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
45
- plugin_gen_empty_mem_callback(vaddr, info);
46
- tcg_temp_free(vaddr);
47
+ plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
48
+ if (copy_addr) {
49
+ tcg_temp_free(copy_addr);
50
+ }
51
}
52
#endif
53
}
54
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
55
{
56
MemOp orig_memop;
57
MemOpIdx oi;
58
+ TCGv copy_addr;
59
60
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
61
memop = tcg_canonicalize_memop(memop, 0, 0);
62
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
63
}
64
}
65
66
- addr = plugin_prep_mem_callbacks(addr);
67
+ copy_addr = plugin_maybe_preserve_addr(addr);
68
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
69
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
70
+ plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
71
72
if ((orig_memop ^ memop) & MO_BSWAP) {
73
switch (orig_memop & MO_SIZE) {
74
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
75
memop &= ~MO_BSWAP;
76
}
77
78
- addr = plugin_prep_mem_callbacks(addr);
79
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
80
gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
81
} else {
82
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
83
}
84
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
85
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
86
87
if (swap) {
88
tcg_temp_free_i32(swap);
89
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
90
{
91
MemOp orig_memop;
92
MemOpIdx oi;
93
+ TCGv copy_addr;
94
95
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
96
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
97
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
98
}
99
}
100
101
- addr = plugin_prep_mem_callbacks(addr);
102
+ copy_addr = plugin_maybe_preserve_addr(addr);
103
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
104
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
105
+ plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
106
107
if ((orig_memop ^ memop) & MO_BSWAP) {
108
int flags = (orig_memop & MO_SIGN
109
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
110
memop &= ~MO_BSWAP;
111
}
112
113
- addr = plugin_prep_mem_callbacks(addr);
114
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
115
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
116
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
117
118
if (swap) {
119
tcg_temp_free_i64(swap);
120
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
121
tcg_debug_assert((memop & MO_SIGN) == 0);
122
123
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
124
- addr = plugin_prep_mem_callbacks(addr);
125
126
/* TODO: For now, force 32-bit hosts to use the helper. */
127
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
129
maybe_free_addr64(a64);
130
}
131
132
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
133
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
134
}
135
136
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
137
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
138
tcg_debug_assert((memop & MO_SIGN) == 0);
139
140
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
141
- addr = plugin_prep_mem_callbacks(addr);
142
143
/* TODO: For now, force 32-bit hosts to use the helper. */
144
145
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
146
maybe_free_addr64(a64);
147
}
148
149
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
150
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
151
}
152
153
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
154
--
155
2.34.1
156
157
diff view generated by jsdifflib
New patch
1
Since we do this inside gen_empty_mem_cb anyway, let's
2
do this earlier inside tcg expansion.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/plugin-gen.h | 4 ++--
8
accel/tcg/plugin-gen.c | 9 +++------
9
tcg/tcg-op-ldst.c | 28 ++++++++++++++++++++--------
10
3 files changed, 25 insertions(+), 16 deletions(-)
11
12
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/exec/plugin-gen.h
15
+++ b/include/exec/plugin-gen.h
16
@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
17
void plugin_gen_insn_end(void);
18
19
void plugin_gen_disable_mem_helpers(void);
20
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info);
21
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info);
22
23
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
24
{
25
@@ -XXX,XX +XXX,XX @@ static inline void plugin_gen_tb_end(CPUState *cpu)
26
static inline void plugin_gen_disable_mem_helpers(void)
27
{ }
28
29
-static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
30
+static inline void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
31
{ }
32
33
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
34
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/accel/tcg/plugin-gen.c
37
+++ b/accel/tcg/plugin-gen.c
38
@@ -XXX,XX +XXX,XX @@ static void gen_empty_inline_cb(void)
39
tcg_temp_free_i64(val);
40
}
41
42
-static void gen_empty_mem_cb(TCGv addr, uint32_t info)
43
+static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
44
{
45
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
46
TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
47
- TCGv_i64 addr64 = tcg_temp_ebb_new_i64();
48
TCGv_ptr udata = tcg_temp_ebb_new_ptr();
49
50
tcg_gen_movi_i32(meminfo, info);
51
tcg_gen_movi_ptr(udata, 0);
52
tcg_gen_ld_i32(cpu_index, cpu_env,
53
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
54
- tcg_gen_extu_tl_i64(addr64, addr);
55
56
- gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr64, udata);
57
+ gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr, udata);
58
59
tcg_temp_free_ptr(udata);
60
- tcg_temp_free_i64(addr64);
61
tcg_temp_free_i32(meminfo);
62
tcg_temp_free_i32(cpu_index);
63
}
64
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
65
}
66
}
67
68
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
69
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
70
{
71
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
72
73
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-ldst.c
76
+++ b/tcg/tcg-op-ldst.c
77
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
78
}
79
80
/* Only required for loads, where value might overlap addr. */
81
-static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
82
+static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
83
{
84
#ifdef CONFIG_PLUGIN
85
if (tcg_ctx->plugin_insn != NULL) {
86
/* Save a copy of the vaddr for use after a load. */
87
- TCGv temp = tcg_temp_new();
88
- tcg_gen_mov_tl(temp, vaddr);
89
+ TCGv_i64 temp = tcg_temp_ebb_new_i64();
90
+ tcg_gen_extu_tl_i64(temp, vaddr);
91
return temp;
92
}
93
#endif
94
@@ -XXX,XX +XXX,XX @@ static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
95
}
96
97
static void
98
-plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
99
+plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
100
enum qemu_plugin_mem_rw rw)
101
{
102
#ifdef CONFIG_PLUGIN
103
if (tcg_ctx->plugin_insn != NULL) {
104
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
105
- plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
106
+
107
+#if TARGET_LONG_BITS == 64
108
if (copy_addr) {
109
- tcg_temp_free(copy_addr);
110
+ plugin_gen_empty_mem_callback(copy_addr, info);
111
+ tcg_temp_free_i64(copy_addr);
112
+ } else {
113
+ plugin_gen_empty_mem_callback(orig_addr, info);
114
}
115
+#else
116
+ if (!copy_addr) {
117
+ copy_addr = tcg_temp_ebb_new_i64();
118
+ tcg_gen_extu_tl_i64(copy_addr, orig_addr);
119
+ }
120
+ plugin_gen_empty_mem_callback(copy_addr, info);
121
+ tcg_temp_free_i64(copy_addr);
122
+#endif
123
}
124
#endif
125
}
126
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
127
{
128
MemOp orig_memop;
129
MemOpIdx oi;
130
- TCGv copy_addr;
131
+ TCGv_i64 copy_addr;
132
133
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
134
memop = tcg_canonicalize_memop(memop, 0, 0);
135
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
136
{
137
MemOp orig_memop;
138
MemOpIdx oi;
139
- TCGv copy_addr;
140
+ TCGv_i64 copy_addr;
141
142
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
143
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
144
--
145
2.34.1
146
147
diff view generated by jsdifflib
New patch
1
This will enable replacement of TARGET_LONG_BITS within tcg/.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 1 +
7
accel/tcg/translate-all.c | 2 ++
8
tcg/tcg.c | 3 +++
9
3 files changed, 6 insertions(+)
10
11
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg.h
14
+++ b/include/tcg/tcg.h
15
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
16
int nb_temps;
17
int nb_indirects;
18
int nb_ops;
19
+ TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
20
21
TCGRegSet reserved_regs;
22
intptr_t current_frame_offset;
23
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/accel/tcg/translate-all.c
26
+++ b/accel/tcg/translate-all.c
27
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
28
tb_set_page_addr0(tb, phys_pc);
29
tb_set_page_addr1(tb, -1);
30
tcg_ctx->gen_tb = tb;
31
+ tcg_ctx->addr_type = TCG_TYPE_TL;
32
+
33
tb_overflow:
34
35
#ifdef CONFIG_PROFILER
36
diff --git a/tcg/tcg.c b/tcg/tcg.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/tcg.c
39
+++ b/tcg/tcg.c
40
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
41
QTAILQ_INIT(&s->ops);
42
QTAILQ_INIT(&s->free_ops);
43
QSIMPLEQ_INIT(&s->labels);
44
+
45
+ tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
46
+ s->addr_type == TCG_TYPE_I64);
47
}
48
49
static TCGTemp *tcg_temp_alloc(TCGContext *s)
50
--
51
2.34.1
52
53
diff view generated by jsdifflib
New patch
1
Expand from TCGv to TCGTemp inline in the translators,
2
and validate that the size matches tcg_ctx->addr_type.
3
These inlines will eventually be seen only by target-specific code.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op.h | 50 ++++++-
9
tcg/tcg-op-ldst.c | 343 ++++++++++++++++++++++++++-----------------
10
2 files changed, 251 insertions(+), 142 deletions(-)
11
12
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op.h
15
+++ b/include/tcg/tcg-op.h
16
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_plugin_cb_end(void)
17
#define tcg_temp_new() tcg_temp_new_i32()
18
#define tcg_global_mem_new tcg_global_mem_new_i32
19
#define tcg_temp_free tcg_temp_free_i32
20
+#define tcgv_tl_temp tcgv_i32_temp
21
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
22
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
23
#else
24
#define tcg_temp_new() tcg_temp_new_i64()
25
#define tcg_global_mem_new tcg_global_mem_new_i64
26
#define tcg_temp_free tcg_temp_free_i64
27
+#define tcgv_tl_temp tcgv_i64_temp
28
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
29
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
30
#endif
31
32
-void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
33
-void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
34
-void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
35
-void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
36
-void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
37
-void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
38
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
39
+void tcg_gen_qemu_st_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
40
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
41
+void tcg_gen_qemu_st_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
42
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
43
+void tcg_gen_qemu_st_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
44
+
45
+static inline void
46
+tcg_gen_qemu_ld_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
47
+{
48
+ tcg_gen_qemu_ld_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
49
+}
50
+
51
+static inline void
52
+tcg_gen_qemu_st_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
53
+{
54
+ tcg_gen_qemu_st_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
55
+}
56
+
57
+static inline void
58
+tcg_gen_qemu_ld_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
59
+{
60
+ tcg_gen_qemu_ld_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
61
+}
62
+
63
+static inline void
64
+tcg_gen_qemu_st_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
65
+{
66
+ tcg_gen_qemu_st_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
67
+}
68
+
69
+static inline void
70
+tcg_gen_qemu_ld_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
71
+{
72
+ tcg_gen_qemu_ld_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
73
+}
74
+
75
+static inline void
76
+tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
77
+{
78
+ tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
79
+}
80
81
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
82
TCGArg, MemOp);
83
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/tcg-op-ldst.c
86
+++ b/tcg/tcg-op-ldst.c
87
@@ -XXX,XX +XXX,XX @@ static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
88
return op;
89
}
90
91
-static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
92
- MemOp memop, TCGArg idx)
93
+static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
94
+ TCGTemp *addr, MemOpIdx oi)
95
{
96
- MemOpIdx oi = make_memop_idx(memop, idx);
97
-#if TARGET_LONG_BITS == 32
98
- tcg_gen_op3i_i32(opc, val, addr, oi);
99
-#else
100
- if (TCG_TARGET_REG_BITS == 32) {
101
- tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
102
+ if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
103
+ if (vh) {
104
+ tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
105
+ } else {
106
+ tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
107
+ }
108
} else {
109
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
110
+ /* See TCGV_LOW/HIGH. */
111
+ TCGTemp *al = addr + HOST_BIG_ENDIAN;
112
+ TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
113
+
114
+ if (vh) {
115
+ tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
116
+ temp_arg(al), temp_arg(ah), oi);
117
+ } else {
118
+ tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
119
+ }
120
}
121
-#endif
122
}
123
124
-static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
125
- MemOp memop, TCGArg idx)
126
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
127
{
128
- MemOpIdx oi = make_memop_idx(memop, idx);
129
-#if TARGET_LONG_BITS == 32
130
if (TCG_TARGET_REG_BITS == 32) {
131
- tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
132
+ TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
133
+ TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
134
+ gen_ldst(opc, vl, vh, addr, oi);
135
} else {
136
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
137
+ gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
138
}
139
-#else
140
- if (TCG_TARGET_REG_BITS == 32) {
141
- tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
142
- TCGV_LOW(addr), TCGV_HIGH(addr), oi);
143
- } else {
144
- tcg_gen_op3i_i64(opc, val, addr, oi);
145
- }
146
-#endif
147
}
148
149
static void tcg_gen_req_mo(TCGBar type)
150
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
151
}
152
153
/* Only required for loads, where value might overlap addr. */
154
-static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
155
+static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
156
{
157
#ifdef CONFIG_PLUGIN
158
if (tcg_ctx->plugin_insn != NULL) {
159
/* Save a copy of the vaddr for use after a load. */
160
TCGv_i64 temp = tcg_temp_ebb_new_i64();
161
- tcg_gen_extu_tl_i64(temp, vaddr);
162
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
163
+ tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
164
+ } else {
165
+ tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
166
+ }
167
return temp;
168
}
169
#endif
170
@@ -XXX,XX +XXX,XX @@ static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
171
}
172
173
static void
174
-plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
175
+plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
176
enum qemu_plugin_mem_rw rw)
177
{
178
#ifdef CONFIG_PLUGIN
179
if (tcg_ctx->plugin_insn != NULL) {
180
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
181
182
-#if TARGET_LONG_BITS == 64
183
- if (copy_addr) {
184
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
185
+ if (!copy_addr) {
186
+ copy_addr = tcg_temp_ebb_new_i64();
187
+ tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
188
+ }
189
plugin_gen_empty_mem_callback(copy_addr, info);
190
tcg_temp_free_i64(copy_addr);
191
} else {
192
- plugin_gen_empty_mem_callback(orig_addr, info);
193
+ if (copy_addr) {
194
+ plugin_gen_empty_mem_callback(copy_addr, info);
195
+ tcg_temp_free_i64(copy_addr);
196
+ } else {
197
+ plugin_gen_empty_mem_callback(temp_tcgv_i64(orig_addr), info);
198
+ }
199
}
200
-#else
201
- if (!copy_addr) {
202
- copy_addr = tcg_temp_ebb_new_i64();
203
- tcg_gen_extu_tl_i64(copy_addr, orig_addr);
204
- }
205
- plugin_gen_empty_mem_callback(copy_addr, info);
206
- tcg_temp_free_i64(copy_addr);
207
-#endif
208
}
209
#endif
210
}
211
212
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
213
+static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
214
+ TCGArg idx, MemOp memop)
215
{
216
MemOp orig_memop;
217
- MemOpIdx oi;
218
+ MemOpIdx orig_oi, oi;
219
TCGv_i64 copy_addr;
220
221
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
222
- memop = tcg_canonicalize_memop(memop, 0, 0);
223
- oi = make_memop_idx(memop, idx);
224
+ orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
225
+ orig_oi = oi = make_memop_idx(memop, idx);
226
227
- orig_memop = memop;
228
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
229
memop &= ~MO_BSWAP;
230
/* The bswap primitive benefits from zero-extended input. */
231
if ((memop & MO_SSIZE) == MO_SW) {
232
memop &= ~MO_SIGN;
233
}
234
+ oi = make_memop_idx(memop, idx);
235
}
236
237
copy_addr = plugin_maybe_preserve_addr(addr);
238
- gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
239
- plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
240
+ gen_ldst(INDEX_op_qemu_ld_i32, tcgv_i32_temp(val), NULL, addr, oi);
241
+ plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
242
243
if ((orig_memop ^ memop) & MO_BSWAP) {
244
switch (orig_memop & MO_SIZE) {
245
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
246
}
247
}
248
249
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
250
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
251
+ MemOp memop, TCGType addr_type)
252
+{
253
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
254
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
255
+ tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
256
+}
257
+
258
+static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
259
+ TCGArg idx, MemOp memop)
260
{
261
TCGv_i32 swap = NULL;
262
- MemOpIdx oi;
263
+ MemOpIdx orig_oi, oi;
264
+ TCGOpcode opc;
265
266
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
267
memop = tcg_canonicalize_memop(memop, 0, 1);
268
- oi = make_memop_idx(memop, idx);
269
+ orig_oi = oi = make_memop_idx(memop, idx);
270
271
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
272
swap = tcg_temp_ebb_new_i32();
273
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
274
}
275
val = swap;
276
memop &= ~MO_BSWAP;
277
+ oi = make_memop_idx(memop, idx);
278
}
279
280
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
281
- gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
282
+ opc = INDEX_op_qemu_st8_i32;
283
} else {
284
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
285
+ opc = INDEX_op_qemu_st_i32;
286
}
287
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
288
+ gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
289
+ plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
290
291
if (swap) {
292
tcg_temp_free_i32(swap);
293
}
294
}
295
296
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
297
+void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
298
+ MemOp memop, TCGType addr_type)
299
+{
300
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
301
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
302
+ tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
303
+}
304
+
305
+static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
306
+ TCGArg idx, MemOp memop)
307
{
308
MemOp orig_memop;
309
- MemOpIdx oi;
310
+ MemOpIdx orig_oi, oi;
311
TCGv_i64 copy_addr;
312
313
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
314
- tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
315
+ tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
316
if (memop & MO_SIGN) {
317
tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
318
} else {
319
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
320
}
321
322
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
323
- memop = tcg_canonicalize_memop(memop, 1, 0);
324
- oi = make_memop_idx(memop, idx);
325
+ orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
326
+ orig_oi = oi = make_memop_idx(memop, idx);
327
328
- orig_memop = memop;
329
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
330
memop &= ~MO_BSWAP;
331
/* The bswap primitive benefits from zero-extended input. */
332
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
333
memop &= ~MO_SIGN;
334
}
335
+ oi = make_memop_idx(memop, idx);
336
}
337
338
copy_addr = plugin_maybe_preserve_addr(addr);
339
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
340
- plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
341
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
342
+ plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
343
344
if ((orig_memop ^ memop) & MO_BSWAP) {
345
int flags = (orig_memop & MO_SIGN
346
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
347
}
348
}
349
350
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
351
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
352
+ MemOp memop, TCGType addr_type)
353
+{
354
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
355
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
356
+ tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
357
+}
358
+
359
+static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
360
+ TCGArg idx, MemOp memop)
361
{
362
TCGv_i64 swap = NULL;
363
- MemOpIdx oi;
364
+ MemOpIdx orig_oi, oi;
365
366
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
367
- tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
368
+ tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
369
return;
370
}
371
372
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
373
memop = tcg_canonicalize_memop(memop, 1, 1);
374
- oi = make_memop_idx(memop, idx);
375
+ orig_oi = oi = make_memop_idx(memop, idx);
376
377
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
378
swap = tcg_temp_ebb_new_i64();
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
380
}
381
val = swap;
382
memop &= ~MO_BSWAP;
383
+ oi = make_memop_idx(memop, idx);
384
}
385
386
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
387
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
388
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
389
+ plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
390
391
if (swap) {
392
tcg_temp_free_i64(swap);
393
}
394
}
395
396
+void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
397
+ MemOp memop, TCGType addr_type)
398
+{
399
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
400
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
401
+ tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
402
+}
403
+
404
/*
405
* Return true if @mop, without knowledge of the pointer alignment,
406
* does not require 16-byte atomicity, and it would be adventagous
407
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
408
{
409
MemOp mop_1 = orig, mop_2;
410
411
- tcg_debug_assert((orig & MO_SIZE) == MO_128);
412
- tcg_debug_assert((orig & MO_SIGN) == 0);
413
-
414
/* Reduce the size to 64-bit. */
415
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
416
417
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
418
ret[1] = mop_2;
419
}
420
421
-#if TARGET_LONG_BITS == 64
422
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
423
-#else
424
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
425
-#endif
426
-
427
static TCGv_i64 maybe_extend_addr64(TCGv addr)
428
{
429
#if TARGET_LONG_BITS == 32
430
@@ -XXX,XX +XXX,XX @@ static void maybe_free_addr64(TCGv_i64 a64)
431
#endif
432
}
433
434
-void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
435
+static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
436
+ TCGArg idx, MemOp memop)
437
{
438
- const MemOpIdx oi = make_memop_idx(memop, idx);
439
-
440
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
441
- tcg_debug_assert((memop & MO_SIGN) == 0);
442
+ const MemOpIdx orig_oi = make_memop_idx(memop, idx);
443
+ TCGv_i64 ext_addr = NULL;
444
445
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
446
447
/* TODO: For now, force 32-bit hosts to use the helper. */
448
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
449
TCGv_i64 lo, hi;
450
- TCGArg addr_arg;
451
- MemOpIdx adj_oi;
452
bool need_bswap = false;
453
+ MemOpIdx oi = orig_oi;
454
455
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
456
lo = TCGV128_HIGH(val);
457
hi = TCGV128_LOW(val);
458
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
459
+ oi = make_memop_idx(memop & ~MO_BSWAP, idx);
460
need_bswap = true;
461
} else {
462
lo = TCGV128_LOW(val);
463
hi = TCGV128_HIGH(val);
464
- adj_oi = oi;
465
}
466
467
-#if TARGET_LONG_BITS == 32
468
- addr_arg = tcgv_i32_arg(addr);
469
-#else
470
- addr_arg = tcgv_i64_arg(addr);
471
-#endif
472
- tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
473
+ gen_ldst(INDEX_op_qemu_ld_i128, tcgv_i64_temp(lo),
474
+ tcgv_i64_temp(hi), addr, oi);
475
476
if (need_bswap) {
477
tcg_gen_bswap64_i64(lo, lo);
478
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
479
}
480
} else if (use_two_i64_for_i128(memop)) {
481
MemOp mop[2];
482
- TCGv addr_p8;
483
+ TCGTemp *addr_p8;
484
TCGv_i64 x, y;
485
+ MemOpIdx oi;
486
+ bool need_bswap;
487
488
canonicalize_memop_i128_as_i64(mop, memop);
489
+ need_bswap = (mop[0] ^ memop) & MO_BSWAP;
490
491
/*
492
* Since there are no global TCGv_i128, there is no visible state
493
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
494
y = TCGV128_LOW(val);
495
}
496
497
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
498
+ oi = make_memop_idx(mop[0], idx);
499
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, oi);
500
501
- if ((mop[0] ^ memop) & MO_BSWAP) {
502
+ if (need_bswap) {
503
tcg_gen_bswap64_i64(x, x);
504
}
505
506
- addr_p8 = tcg_temp_ebb_new();
507
- tcg_gen_addi_tl(addr_p8, addr, 8);
508
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
509
- tcg_temp_free(addr_p8);
510
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
511
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
512
+ tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
513
+ addr_p8 = tcgv_i32_temp(t);
514
+ } else {
515
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
516
+ tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
517
+ addr_p8 = tcgv_i64_temp(t);
518
+ }
519
520
- if ((mop[0] ^ memop) & MO_BSWAP) {
521
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, oi);
522
+ tcg_temp_free_internal(addr_p8);
523
+
524
+ if (need_bswap) {
525
tcg_gen_bswap64_i64(y, y);
526
}
527
} else {
528
- TCGv_i64 a64 = maybe_extend_addr64(addr);
529
- gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
530
- maybe_free_addr64(a64);
531
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
532
+ ext_addr = tcg_temp_ebb_new_i64();
533
+ tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
534
+ addr = tcgv_i64_temp(ext_addr);
535
+ }
536
+ gen_helper_ld_i128(val, cpu_env, temp_tcgv_i64(addr),
537
+ tcg_constant_i32(orig_oi));
538
}
539
540
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
541
+ plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
542
}
543
544
-void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
545
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
546
+ MemOp memop, TCGType addr_type)
547
{
548
- const MemOpIdx oi = make_memop_idx(memop, idx);
549
-
550
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
551
tcg_debug_assert((memop & MO_SIZE) == MO_128);
552
tcg_debug_assert((memop & MO_SIGN) == 0);
553
+ tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
554
+}
555
+
556
+static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
557
+ TCGArg idx, MemOp memop)
558
+{
559
+ const MemOpIdx orig_oi = make_memop_idx(memop, idx);
560
+ TCGv_i64 ext_addr = NULL;
561
562
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
563
564
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
565
566
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
567
TCGv_i64 lo, hi;
568
- TCGArg addr_arg;
569
- MemOpIdx adj_oi;
570
+ MemOpIdx oi = orig_oi;
571
bool need_bswap = false;
572
573
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
574
- lo = tcg_temp_new_i64();
575
- hi = tcg_temp_new_i64();
576
+ lo = tcg_temp_ebb_new_i64();
577
+ hi = tcg_temp_ebb_new_i64();
578
tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
579
tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
580
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
581
+ oi = make_memop_idx(memop & ~MO_BSWAP, idx);
582
need_bswap = true;
583
} else {
584
lo = TCGV128_LOW(val);
585
hi = TCGV128_HIGH(val);
586
- adj_oi = oi;
587
}
588
589
-#if TARGET_LONG_BITS == 32
590
- addr_arg = tcgv_i32_arg(addr);
591
-#else
592
- addr_arg = tcgv_i64_arg(addr);
593
-#endif
594
- tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
595
+ gen_ldst(INDEX_op_qemu_st_i128, tcgv_i64_temp(lo),
596
+ tcgv_i64_temp(hi), addr, oi);
597
598
if (need_bswap) {
599
tcg_temp_free_i64(lo);
600
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
601
}
602
} else if (use_two_i64_for_i128(memop)) {
603
MemOp mop[2];
604
- TCGv addr_p8;
605
- TCGv_i64 x, y;
606
+ TCGTemp *addr_p8;
607
+ TCGv_i64 x, y, b = NULL;
608
609
canonicalize_memop_i128_as_i64(mop, memop);
610
611
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
612
y = TCGV128_LOW(val);
613
}
614
615
- addr_p8 = tcg_temp_ebb_new();
616
if ((mop[0] ^ memop) & MO_BSWAP) {
617
- TCGv_i64 t = tcg_temp_ebb_new_i64();
618
-
619
- tcg_gen_bswap64_i64(t, x);
620
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
621
- tcg_gen_bswap64_i64(t, y);
622
- tcg_gen_addi_tl(addr_p8, addr, 8);
623
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
624
- tcg_temp_free_i64(t);
625
- } else {
626
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
627
- tcg_gen_addi_tl(addr_p8, addr, 8);
628
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
629
+ b = tcg_temp_ebb_new_i64();
630
+ tcg_gen_bswap64_i64(b, x);
631
+ x = b;
632
}
633
- tcg_temp_free(addr_p8);
634
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
635
+ make_memop_idx(mop[0], idx));
636
+
637
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
638
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
639
+ tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
640
+ addr_p8 = tcgv_i32_temp(t);
641
+ } else {
642
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
643
+ tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
644
+ addr_p8 = tcgv_i64_temp(t);
645
+ }
646
+
647
+ if (b) {
648
+ tcg_gen_bswap64_i64(b, y);
649
+ y = b;
650
+ }
651
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
652
+ make_memop_idx(mop[1], idx));
653
+
654
+ if (b) {
655
+ tcg_temp_free_i64(b);
656
+ }
657
+ tcg_temp_free_internal(addr_p8);
658
} else {
659
- TCGv_i64 a64 = maybe_extend_addr64(addr);
660
- gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
661
- maybe_free_addr64(a64);
662
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
663
+ ext_addr = tcg_temp_ebb_new_i64();
664
+ tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
665
+ addr = tcgv_i64_temp(ext_addr);
666
+ }
667
+ gen_helper_st_i128(cpu_env, temp_tcgv_i64(addr), val,
668
+ tcg_constant_i32(orig_oi));
669
}
670
671
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
672
+ plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W);
673
+}
674
+
675
+void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
676
+ MemOp memop, TCGType addr_type)
677
+{
678
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
679
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
680
+ tcg_debug_assert((memop & MO_SIGN) == 0);
681
+ tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
682
}
683
684
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
685
--
686
2.34.1
687
688
diff view generated by jsdifflib
New patch
1
Expand from TCGv to TCGTemp inline in the translators,
2
and validate that the size matches tcg_ctx->addr_type.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/tcg/tcg-op.h | 184 ++++++++++++++++++++++++++++++----------
8
tcg/tcg-op-ldst.c | 198 ++++++++++++++++++++++++++++---------------
9
2 files changed, 267 insertions(+), 115 deletions(-)
10
11
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg-op.h
14
+++ b/include/tcg/tcg-op.h
15
@@ -XXX,XX +XXX,XX @@ tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
16
tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
17
}
18
19
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
20
- TCGArg, MemOp);
21
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
22
- TCGArg, MemOp);
23
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
24
- TCGArg, MemOp);
25
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
26
+ TCGArg, MemOp, TCGType);
27
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
28
+ TCGArg, MemOp, TCGType);
29
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
30
+ TCGv_i128, TCGArg, MemOp, TCGType);
31
32
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
33
- TCGArg, MemOp);
34
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
35
- TCGArg, MemOp);
36
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
37
- TCGArg, MemOp);
38
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
39
+ TCGArg, MemOp, TCGType);
40
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
41
+ TCGArg, MemOp, TCGType);
42
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
43
+ TCGv_i128, TCGArg, MemOp, TCGType);
44
45
-void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
46
-void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
47
+void tcg_gen_atomic_xchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
48
+ TCGArg, MemOp, TCGType);
49
+void tcg_gen_atomic_xchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
50
+ TCGArg, MemOp, TCGType);
51
52
-void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
53
-void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
54
-void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
55
-void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
56
-void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
57
-void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
58
-void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
59
-void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
60
-void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
61
-void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
62
-void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
63
-void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
64
-void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
65
-void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
66
-void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
67
-void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
68
+void tcg_gen_atomic_fetch_add_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
69
+ TCGArg, MemOp, TCGType);
70
+void tcg_gen_atomic_fetch_add_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
71
+ TCGArg, MemOp, TCGType);
72
+void tcg_gen_atomic_fetch_and_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
73
+ TCGArg, MemOp, TCGType);
74
+void tcg_gen_atomic_fetch_and_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
75
+ TCGArg, MemOp, TCGType);
76
+void tcg_gen_atomic_fetch_or_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
77
+ TCGArg, MemOp, TCGType);
78
+void tcg_gen_atomic_fetch_or_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
79
+ TCGArg, MemOp, TCGType);
80
+void tcg_gen_atomic_fetch_xor_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
81
+ TCGArg, MemOp, TCGType);
82
+void tcg_gen_atomic_fetch_xor_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
83
+ TCGArg, MemOp, TCGType);
84
+void tcg_gen_atomic_fetch_smin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
85
+ TCGArg, MemOp, TCGType);
86
+void tcg_gen_atomic_fetch_smin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
87
+ TCGArg, MemOp, TCGType);
88
+void tcg_gen_atomic_fetch_umin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
89
+ TCGArg, MemOp, TCGType);
90
+void tcg_gen_atomic_fetch_umin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
91
+ TCGArg, MemOp, TCGType);
92
+void tcg_gen_atomic_fetch_smax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
93
+ TCGArg, MemOp, TCGType);
94
+void tcg_gen_atomic_fetch_smax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
95
+ TCGArg, MemOp, TCGType);
96
+void tcg_gen_atomic_fetch_umax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
97
+ TCGArg, MemOp, TCGType);
98
+void tcg_gen_atomic_fetch_umax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
99
+ TCGArg, MemOp, TCGType);
100
101
-void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
102
-void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
103
-void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
104
-void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
105
-void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
106
-void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
107
-void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
108
-void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
109
-void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
110
-void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
111
-void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
112
-void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
113
-void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
114
-void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
115
-void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
116
-void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
117
+void tcg_gen_atomic_add_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
118
+ TCGArg, MemOp, TCGType);
119
+void tcg_gen_atomic_add_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
120
+ TCGArg, MemOp, TCGType);
121
+void tcg_gen_atomic_and_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
122
+ TCGArg, MemOp, TCGType);
123
+void tcg_gen_atomic_and_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
124
+ TCGArg, MemOp, TCGType);
125
+void tcg_gen_atomic_or_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
126
+ TCGArg, MemOp, TCGType);
127
+void tcg_gen_atomic_or_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
128
+ TCGArg, MemOp, TCGType);
129
+void tcg_gen_atomic_xor_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
130
+ TCGArg, MemOp, TCGType);
131
+void tcg_gen_atomic_xor_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
132
+ TCGArg, MemOp, TCGType);
133
+void tcg_gen_atomic_smin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
134
+ TCGArg, MemOp, TCGType);
135
+void tcg_gen_atomic_smin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
136
+ TCGArg, MemOp, TCGType);
137
+void tcg_gen_atomic_umin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
138
+ TCGArg, MemOp, TCGType);
139
+void tcg_gen_atomic_umin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
140
+ TCGArg, MemOp, TCGType);
141
+void tcg_gen_atomic_smax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
142
+ TCGArg, MemOp, TCGType);
143
+void tcg_gen_atomic_smax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
144
+ TCGArg, MemOp, TCGType);
145
+void tcg_gen_atomic_umax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
146
+ TCGArg, MemOp, TCGType);
147
+void tcg_gen_atomic_umax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
148
+ TCGArg, MemOp, TCGType);
149
+
150
+#define DEF_ATOMIC2(N, S) \
151
+ static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S v, \
152
+ TCGArg i, MemOp m) \
153
+ { N##_##S##_chk(r, tcgv_tl_temp(a), v, i, m, TCG_TYPE_TL); }
154
+
155
+#define DEF_ATOMIC3(N, S) \
156
+ static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S o, \
157
+ TCGv_##S n, TCGArg i, MemOp m) \
158
+ { N##_##S##_chk(r, tcgv_tl_temp(a), o, n, i, m, TCG_TYPE_TL); }
159
+
160
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i32)
161
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i64)
162
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i128)
163
+
164
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i32)
165
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i64)
166
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i128)
167
+
168
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i32)
169
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i64)
170
+
171
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i32)
172
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i64)
173
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i32)
174
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i64)
175
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i32)
176
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i64)
177
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i32)
178
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i64)
179
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i32)
180
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i64)
181
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i32)
182
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i64)
183
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i32)
184
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i64)
185
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i32)
186
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i64)
187
+
188
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i32)
189
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i64)
190
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i32)
191
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i64)
192
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i32)
193
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i64)
194
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i32)
195
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i64)
196
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i32)
197
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i64)
198
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i32)
199
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i64)
200
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i32)
201
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i64)
202
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i32)
203
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
204
+
205
+#undef DEF_ATOMIC2
206
+#undef DEF_ATOMIC3
207
208
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
209
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
210
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
211
index XXXXXXX..XXXXXXX 100644
212
--- a/tcg/tcg-op-ldst.c
213
+++ b/tcg/tcg-op-ldst.c
214
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
215
ret[1] = mop_2;
216
}
217
218
-static TCGv_i64 maybe_extend_addr64(TCGv addr)
219
+static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
220
{
221
-#if TARGET_LONG_BITS == 32
222
- TCGv_i64 a64 = tcg_temp_ebb_new_i64();
223
- tcg_gen_extu_i32_i64(a64, addr);
224
- return a64;
225
-#else
226
- return addr;
227
-#endif
228
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
229
+ TCGv_i64 a64 = tcg_temp_ebb_new_i64();
230
+ tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
231
+ return a64;
232
+ }
233
+ return temp_tcgv_i64(addr);
234
}
235
236
static void maybe_free_addr64(TCGv_i64 a64)
237
{
238
-#if TARGET_LONG_BITS == 32
239
- tcg_temp_free_i64(a64);
240
-#endif
241
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
242
+ tcg_temp_free_i64(a64);
243
+ }
244
}
245
246
static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
247
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
248
WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
249
};
250
251
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
252
- TCGv_i32 newv, TCGArg idx, MemOp memop)
253
+static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
254
+ TCGv_i32 cmpv, TCGv_i32 newv,
255
+ TCGArg idx, MemOp memop)
256
{
257
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
258
TCGv_i32 t2 = tcg_temp_ebb_new_i32();
259
260
tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
261
262
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
263
+ tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
264
tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
265
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
266
+ tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
267
tcg_temp_free_i32(t2);
268
269
if (memop & MO_SIGN) {
270
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
271
tcg_temp_free_i32(t1);
272
}
273
274
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
275
- TCGv_i32 newv, TCGArg idx, MemOp memop)
276
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
277
+ TCGv_i32 cmpv, TCGv_i32 newv,
278
+ TCGArg idx, MemOp memop,
279
+ TCGType addr_type)
280
+{
281
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
282
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
283
+ tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
284
+}
285
+
286
+static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
287
+ TCGv_i32 cmpv, TCGv_i32 newv,
288
+ TCGArg idx, MemOp memop)
289
{
290
gen_atomic_cx_i32 gen;
291
TCGv_i64 a64;
292
MemOpIdx oi;
293
294
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
295
- tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
296
+ tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
297
return;
298
}
299
300
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
301
}
302
}
303
304
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
305
- TCGv_i64 newv, TCGArg idx, MemOp memop)
306
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
307
+ TCGv_i32 cmpv, TCGv_i32 newv,
308
+ TCGArg idx, MemOp memop,
309
+ TCGType addr_type)
310
+{
311
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
312
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
313
+ tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
314
+}
315
+
316
+static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
317
+ TCGv_i64 cmpv, TCGv_i64 newv,
318
+ TCGArg idx, MemOp memop)
319
{
320
TCGv_i64 t1, t2;
321
322
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
323
- tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
324
- TCGV_LOW(newv), idx, memop);
325
+ tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
326
+ TCGV_LOW(newv), idx, memop);
327
if (memop & MO_SIGN) {
328
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
329
} else {
330
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
331
332
tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
333
334
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
335
+ tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
336
tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
337
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
338
+ tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
339
tcg_temp_free_i64(t2);
340
341
if (memop & MO_SIGN) {
342
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
343
tcg_temp_free_i64(t1);
344
}
345
346
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
347
- TCGv_i64 newv, TCGArg idx, MemOp memop)
348
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
349
+ TCGv_i64 cmpv, TCGv_i64 newv,
350
+ TCGArg idx, MemOp memop,
351
+ TCGType addr_type)
352
+{
353
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
354
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
355
+ tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
356
+}
357
+
358
+static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
359
+ TCGv_i64 cmpv, TCGv_i64 newv,
360
+ TCGArg idx, MemOp memop)
361
{
362
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
363
- tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
364
+ tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
365
return;
366
}
367
368
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
369
}
370
371
if (TCG_TARGET_REG_BITS == 32) {
372
- tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
373
- TCGV_LOW(newv), idx, memop);
374
+ tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
375
+ TCGV_LOW(newv), idx, memop);
376
if (memop & MO_SIGN) {
377
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
378
} else {
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
380
381
tcg_gen_extrl_i64_i32(c32, cmpv);
382
tcg_gen_extrl_i64_i32(n32, newv);
383
- tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
384
+ tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
385
+ idx, memop & ~MO_SIGN);
386
tcg_temp_free_i32(c32);
387
tcg_temp_free_i32(n32);
388
389
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
390
}
391
}
392
393
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
394
- TCGv_i128 newv, TCGArg idx, MemOp memop)
395
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
396
+ TCGv_i64 cmpv, TCGv_i64 newv,
397
+ TCGArg idx, MemOp memop, TCGType addr_type)
398
+{
399
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
400
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
401
+ tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
402
+}
403
+
404
+static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
405
+ TCGv_i128 cmpv, TCGv_i128 newv,
406
+ TCGArg idx, MemOp memop)
407
{
408
if (TCG_TARGET_REG_BITS == 32) {
409
/* Inline expansion below is simply too large for 32-bit hosts. */
410
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
411
? gen_helper_nonatomic_cmpxchgo_le
412
: gen_helper_nonatomic_cmpxchgo_be);
413
MemOpIdx oi = make_memop_idx(memop, idx);
414
- TCGv_i64 a64;
415
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
416
417
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
418
- tcg_debug_assert((memop & MO_SIGN) == 0);
419
-
420
- a64 = maybe_extend_addr64(addr);
421
gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
422
maybe_free_addr64(a64);
423
} else {
424
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
425
TCGv_i64 t1 = tcg_temp_ebb_new_i64();
426
TCGv_i64 z = tcg_constant_i64(0);
427
428
- tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
429
+ tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
430
431
/* Compare i128 */
432
tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
433
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
434
TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
435
436
/* Unconditional writeback. */
437
- tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
438
+ tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
439
tcg_gen_mov_i128(retv, oldv);
440
441
tcg_temp_free_i64(t0);
442
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
443
}
444
}
445
446
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
447
- TCGv_i128 newv, TCGArg idx, MemOp memop)
448
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
449
+ TCGv_i128 cmpv, TCGv_i128 newv,
450
+ TCGArg idx, MemOp memop,
451
+ TCGType addr_type)
452
+{
453
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
454
+ tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
455
+ tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
456
+}
457
+
458
+static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
459
+ TCGv_i128 cmpv, TCGv_i128 newv,
460
+ TCGArg idx, MemOp memop)
461
{
462
gen_atomic_cx_i128 gen;
463
464
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
465
- tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
466
+ tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
467
return;
468
}
469
470
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
471
- tcg_debug_assert((memop & MO_SIGN) == 0);
472
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
473
-
474
if (gen) {
475
MemOpIdx oi = make_memop_idx(memop, idx);
476
TCGv_i64 a64 = maybe_extend_addr64(addr);
477
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
478
tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
479
}
480
481
-static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
482
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
483
+ TCGv_i128 cmpv, TCGv_i128 newv,
484
+ TCGArg idx, MemOp memop,
485
+ TCGType addr_type)
486
+{
487
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
488
+ tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
489
+ tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
490
+}
491
+
492
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
493
TCGArg idx, MemOp memop, bool new_val,
494
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
495
{
496
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
497
498
memop = tcg_canonicalize_memop(memop, 0, 0);
499
500
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
501
+ tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
502
tcg_gen_ext_i32(t2, val, memop);
503
gen(t2, t1, t2);
504
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
505
+ tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
506
507
tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
508
tcg_temp_free_i32(t1);
509
tcg_temp_free_i32(t2);
510
}
511
512
-static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
513
+static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
514
TCGArg idx, MemOp memop, void * const table[])
515
{
516
gen_atomic_op_i32 gen;
517
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
518
}
519
}
520
521
-static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
522
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
523
TCGArg idx, MemOp memop, bool new_val,
524
void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
525
{
526
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
527
528
memop = tcg_canonicalize_memop(memop, 1, 0);
529
530
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
531
+ tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
532
tcg_gen_ext_i64(t2, val, memop);
533
gen(t2, t1, t2);
534
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
535
+ tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
536
537
tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
538
tcg_temp_free_i64(t1);
539
tcg_temp_free_i64(t2);
540
}
541
542
-static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
543
+static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
544
TCGArg idx, MemOp memop, void * const table[])
545
{
546
memop = tcg_canonicalize_memop(memop, 1, 0);
547
548
if ((memop & MO_SIZE) == MO_64) {
549
-#ifdef CONFIG_ATOMIC64
550
- gen_atomic_op_i64 gen;
551
- TCGv_i64 a64;
552
- MemOpIdx oi;
553
+ gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
554
555
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
556
- tcg_debug_assert(gen != NULL);
557
+ if (gen) {
558
+ MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
559
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
560
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
561
+ maybe_free_addr64(a64);
562
+ return;
563
+ }
564
565
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
566
- a64 = maybe_extend_addr64(addr);
567
- gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
568
- maybe_free_addr64(a64);
569
-#else
570
gen_helper_exit_atomic(cpu_env);
571
/* Produce a result, so that we have a well-formed opcode stream
572
with respect to uses of the result in the (dead) code following. */
573
tcg_gen_movi_i64(ret, 0);
574
-#endif /* CONFIG_ATOMIC64 */
575
} else {
576
TCGv_i32 v32 = tcg_temp_ebb_new_i32();
577
TCGv_i32 r32 = tcg_temp_ebb_new_i32();
578
@@ -XXX,XX +XXX,XX @@ static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
579
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
580
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
581
}; \
582
-void tcg_gen_atomic_##NAME##_i32 \
583
- (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
584
+void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr, \
585
+ TCGv_i32 val, TCGArg idx, \
586
+ MemOp memop, TCGType addr_type) \
587
{ \
588
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type); \
589
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32); \
590
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
591
do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
592
} else { \
593
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_##NAME##_i32 \
594
tcg_gen_##OP##_i32); \
595
} \
596
} \
597
-void tcg_gen_atomic_##NAME##_i64 \
598
- (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
599
+void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr, \
600
+ TCGv_i64 val, TCGArg idx, \
601
+ MemOp memop, TCGType addr_type) \
602
{ \
603
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type); \
604
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64); \
605
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
606
do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
607
} else { \
608
--
609
2.34.1
610
611
diff view generated by jsdifflib
New patch
1
For 32-bit hosts, we cannot simply rely on TCGContext.addr_bits,
2
as we need one or two host registers to represent the guest address.
1
3
4
Create the new opcodes and update all users. Since we have not
5
yet eliminated TARGET_LONG_BITS, only one of the two opcodes will
6
ever be used, so we can get away with treating them the same in
7
the backends.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/tcg/tcg-opc.h | 35 ++++++++----
13
tcg/optimize.c | 19 +++++--
14
tcg/tcg-op-ldst.c | 83 ++++++++++++++++++++++-------
15
tcg/tcg.c | 42 ++++++++++-----
16
tcg/tci.c | 32 +++++++----
17
tcg/aarch64/tcg-target.c.inc | 36 ++++++++-----
18
tcg/arm/tcg-target.c.inc | 83 +++++++++++++++--------------
19
tcg/i386/tcg-target.c.inc | 91 ++++++++++++++++++++------------
20
tcg/loongarch64/tcg-target.c.inc | 24 ++++++---
21
tcg/mips/tcg-target.c.inc | 66 ++++++++++++++---------
22
tcg/ppc/tcg-target.c.inc | 91 +++++++++++++++++++-------------
23
tcg/riscv/tcg-target.c.inc | 24 ++++++---
24
tcg/s390x/tcg-target.c.inc | 36 ++++++++-----
25
tcg/sparc64/tcg-target.c.inc | 24 ++++++---
26
tcg/tci/tcg-target.c.inc | 44 ++++++++-------
27
15 files changed, 468 insertions(+), 262 deletions(-)
28
29
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/tcg/tcg-opc.h
32
+++ b/include/tcg/tcg-opc.h
33
@@ -XXX,XX +XXX,XX @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
34
DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
35
DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
36
37
-#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
38
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
39
40
/* QEMU specific */
41
@@ -XXX,XX +XXX,XX @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
42
DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
43
DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
44
45
-DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
46
+/* Replicate ld/st ops for 32 and 64-bit guest addresses. */
47
+DEF(qemu_ld_a32_i32, 1, 1, 1,
48
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
49
-DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,
50
+DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
51
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
52
-DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
53
+DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
54
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
55
-DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
56
+DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
57
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
58
+
59
+DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
60
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
61
+DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
62
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
63
+DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
64
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
65
+DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
66
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
67
68
/* Only used by i386 to cope with stupid register constraints. */
69
-DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
70
+DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
72
+ IMPL(TCG_TARGET_HAS_qemu_st8_i32))
73
+DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
74
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
75
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
76
77
/* Only for 64-bit hosts at the moment. */
78
-DEF(qemu_ld_i128, 2, 1, 1,
79
+DEF(qemu_ld_a32_i128, 2, 1, 1,
80
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
81
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
82
-DEF(qemu_st_i128, 0, 3, 1,
83
+DEF(qemu_ld_a64_i128, 2, 1, 1,
84
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
85
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
86
+DEF(qemu_st_a32_i128, 0, 3, 1,
87
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
88
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
89
+DEF(qemu_st_a64_i128, 0, 3, 1,
90
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
91
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
92
93
@@ -XXX,XX +XXX,XX @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
94
DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
95
#endif
96
97
-#undef TLADDR_ARGS
98
#undef DATA64_ARGS
99
#undef IMPL
100
#undef IMPL64
101
diff --git a/tcg/optimize.c b/tcg/optimize.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/tcg/optimize.c
104
+++ b/tcg/optimize.c
105
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
106
CASE_OP_32_64_VEC(orc):
107
done = fold_orc(&ctx, op);
108
break;
109
- case INDEX_op_qemu_ld_i32:
110
- case INDEX_op_qemu_ld_i64:
111
+ case INDEX_op_qemu_ld_a32_i32:
112
+ case INDEX_op_qemu_ld_a64_i32:
113
+ case INDEX_op_qemu_ld_a32_i64:
114
+ case INDEX_op_qemu_ld_a64_i64:
115
+ case INDEX_op_qemu_ld_a32_i128:
116
+ case INDEX_op_qemu_ld_a64_i128:
117
done = fold_qemu_ld(&ctx, op);
118
break;
119
- case INDEX_op_qemu_st_i32:
120
- case INDEX_op_qemu_st8_i32:
121
- case INDEX_op_qemu_st_i64:
122
+ case INDEX_op_qemu_st8_a32_i32:
123
+ case INDEX_op_qemu_st8_a64_i32:
124
+ case INDEX_op_qemu_st_a32_i32:
125
+ case INDEX_op_qemu_st_a64_i32:
126
+ case INDEX_op_qemu_st_a32_i64:
127
+ case INDEX_op_qemu_st_a64_i64:
128
+ case INDEX_op_qemu_st_a32_i128:
129
+ case INDEX_op_qemu_st_a64_i128:
130
done = fold_qemu_st(&ctx, op);
131
break;
132
CASE_OP_32_64(rem):
133
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/tcg/tcg-op-ldst.c
136
+++ b/tcg/tcg-op-ldst.c
137
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
138
MemOp orig_memop;
139
MemOpIdx orig_oi, oi;
140
TCGv_i64 copy_addr;
141
+ TCGOpcode opc;
142
143
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
144
orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
145
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
146
}
147
148
copy_addr = plugin_maybe_preserve_addr(addr);
149
- gen_ldst(INDEX_op_qemu_ld_i32, tcgv_i32_temp(val), NULL, addr, oi);
150
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
151
+ opc = INDEX_op_qemu_ld_a32_i32;
152
+ } else {
153
+ opc = INDEX_op_qemu_ld_a64_i32;
154
+ }
155
+ gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
156
plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
157
158
if ((orig_memop ^ memop) & MO_BSWAP) {
159
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
160
}
161
162
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
163
- opc = INDEX_op_qemu_st8_i32;
164
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
165
+ opc = INDEX_op_qemu_st8_a32_i32;
166
+ } else {
167
+ opc = INDEX_op_qemu_st8_a64_i32;
168
+ }
169
} else {
170
- opc = INDEX_op_qemu_st_i32;
171
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
172
+ opc = INDEX_op_qemu_st_a32_i32;
173
+ } else {
174
+ opc = INDEX_op_qemu_st_a64_i32;
175
+ }
176
}
177
gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
178
plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
179
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
180
MemOp orig_memop;
181
MemOpIdx orig_oi, oi;
182
TCGv_i64 copy_addr;
183
+ TCGOpcode opc;
184
185
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
186
tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
187
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
188
}
189
190
copy_addr = plugin_maybe_preserve_addr(addr);
191
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
192
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
193
+ opc = INDEX_op_qemu_ld_a32_i64;
194
+ } else {
195
+ opc = INDEX_op_qemu_ld_a64_i64;
196
+ }
197
+ gen_ldst_i64(opc, val, addr, oi);
198
plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
199
200
if ((orig_memop ^ memop) & MO_BSWAP) {
201
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
202
{
203
TCGv_i64 swap = NULL;
204
MemOpIdx orig_oi, oi;
205
+ TCGOpcode opc;
206
207
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
208
tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
209
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
210
oi = make_memop_idx(memop, idx);
211
}
212
213
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
214
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
215
+ opc = INDEX_op_qemu_st_a32_i64;
216
+ } else {
217
+ opc = INDEX_op_qemu_st_a64_i64;
218
+ }
219
+ gen_ldst_i64(opc, val, addr, oi);
220
plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
221
222
if (swap) {
223
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
224
{
225
const MemOpIdx orig_oi = make_memop_idx(memop, idx);
226
TCGv_i64 ext_addr = NULL;
227
+ TCGOpcode opc;
228
229
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
230
231
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
232
hi = TCGV128_HIGH(val);
233
}
234
235
- gen_ldst(INDEX_op_qemu_ld_i128, tcgv_i64_temp(lo),
236
- tcgv_i64_temp(hi), addr, oi);
237
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
238
+ opc = INDEX_op_qemu_ld_a32_i128;
239
+ } else {
240
+ opc = INDEX_op_qemu_ld_a64_i128;
241
+ }
242
+ gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
243
244
if (need_bswap) {
245
tcg_gen_bswap64_i64(lo, lo);
246
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
247
canonicalize_memop_i128_as_i64(mop, memop);
248
need_bswap = (mop[0] ^ memop) & MO_BSWAP;
249
250
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
251
+ opc = INDEX_op_qemu_ld_a32_i64;
252
+ } else {
253
+ opc = INDEX_op_qemu_ld_a64_i64;
254
+ }
255
+
256
/*
257
* Since there are no global TCGv_i128, there is no visible state
258
* changed if the second load faults. Load directly into the two
259
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
260
}
261
262
oi = make_memop_idx(mop[0], idx);
263
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, oi);
264
+ gen_ldst_i64(opc, x, addr, oi);
265
266
if (need_bswap) {
267
tcg_gen_bswap64_i64(x, x);
268
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
269
addr_p8 = tcgv_i64_temp(t);
270
}
271
272
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, oi);
273
+ gen_ldst_i64(opc, y, addr_p8, oi);
274
tcg_temp_free_internal(addr_p8);
275
276
if (need_bswap) {
277
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
278
{
279
const MemOpIdx orig_oi = make_memop_idx(memop, idx);
280
TCGv_i64 ext_addr = NULL;
281
+ TCGOpcode opc;
282
283
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
284
285
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
286
hi = TCGV128_HIGH(val);
287
}
288
289
- gen_ldst(INDEX_op_qemu_st_i128, tcgv_i64_temp(lo),
290
- tcgv_i64_temp(hi), addr, oi);
291
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
292
+ opc = INDEX_op_qemu_st_a32_i128;
293
+ } else {
294
+ opc = INDEX_op_qemu_st_a64_i128;
295
+ }
296
+ gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
297
298
if (need_bswap) {
299
tcg_temp_free_i64(lo);
300
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
301
302
canonicalize_memop_i128_as_i64(mop, memop);
303
304
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
305
+ opc = INDEX_op_qemu_st_a32_i64;
306
+ } else {
307
+ opc = INDEX_op_qemu_st_a64_i64;
308
+ }
309
+
310
if ((memop & MO_BSWAP) == MO_LE) {
311
x = TCGV128_LOW(val);
312
y = TCGV128_HIGH(val);
313
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
314
tcg_gen_bswap64_i64(b, x);
315
x = b;
316
}
317
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
318
- make_memop_idx(mop[0], idx));
319
+
320
+ gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
321
322
if (tcg_ctx->addr_type == TCG_TYPE_I32) {
323
TCGv_i32 t = tcg_temp_ebb_new_i32();
324
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
325
326
if (b) {
327
tcg_gen_bswap64_i64(b, y);
328
- y = b;
329
- }
330
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
331
- make_memop_idx(mop[1], idx));
332
-
333
- if (b) {
334
+ gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
335
tcg_temp_free_i64(b);
336
+ } else {
337
+ gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
338
}
339
tcg_temp_free_internal(addr_p8);
340
} else {
341
diff --git a/tcg/tcg.c b/tcg/tcg.c
342
index XXXXXXX..XXXXXXX 100644
343
--- a/tcg/tcg.c
344
+++ b/tcg/tcg.c
345
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
346
case INDEX_op_exit_tb:
347
case INDEX_op_goto_tb:
348
case INDEX_op_goto_ptr:
349
- case INDEX_op_qemu_ld_i32:
350
- case INDEX_op_qemu_st_i32:
351
- case INDEX_op_qemu_ld_i64:
352
- case INDEX_op_qemu_st_i64:
353
+ case INDEX_op_qemu_ld_a32_i32:
354
+ case INDEX_op_qemu_ld_a64_i32:
355
+ case INDEX_op_qemu_st_a32_i32:
356
+ case INDEX_op_qemu_st_a64_i32:
357
+ case INDEX_op_qemu_ld_a32_i64:
358
+ case INDEX_op_qemu_ld_a64_i64:
359
+ case INDEX_op_qemu_st_a32_i64:
360
+ case INDEX_op_qemu_st_a64_i64:
361
return true;
362
363
- case INDEX_op_qemu_st8_i32:
364
+ case INDEX_op_qemu_st8_a32_i32:
365
+ case INDEX_op_qemu_st8_a64_i32:
366
return TCG_TARGET_HAS_qemu_st8_i32;
367
368
- case INDEX_op_qemu_ld_i128:
369
- case INDEX_op_qemu_st_i128:
370
+ case INDEX_op_qemu_ld_a32_i128:
371
+ case INDEX_op_qemu_ld_a64_i128:
372
+ case INDEX_op_qemu_st_a32_i128:
373
+ case INDEX_op_qemu_st_a64_i128:
374
return TCG_TARGET_HAS_qemu_ldst_i128;
375
376
case INDEX_op_mov_i32:
377
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
378
}
379
i = 1;
380
break;
381
- case INDEX_op_qemu_ld_i32:
382
- case INDEX_op_qemu_st_i32:
383
- case INDEX_op_qemu_st8_i32:
384
- case INDEX_op_qemu_ld_i64:
385
- case INDEX_op_qemu_st_i64:
386
- case INDEX_op_qemu_ld_i128:
387
- case INDEX_op_qemu_st_i128:
388
+ case INDEX_op_qemu_ld_a32_i32:
389
+ case INDEX_op_qemu_ld_a64_i32:
390
+ case INDEX_op_qemu_st_a32_i32:
391
+ case INDEX_op_qemu_st_a64_i32:
392
+ case INDEX_op_qemu_st8_a32_i32:
393
+ case INDEX_op_qemu_st8_a64_i32:
394
+ case INDEX_op_qemu_ld_a32_i64:
395
+ case INDEX_op_qemu_ld_a64_i64:
396
+ case INDEX_op_qemu_st_a32_i64:
397
+ case INDEX_op_qemu_st_a64_i64:
398
+ case INDEX_op_qemu_ld_a32_i128:
399
+ case INDEX_op_qemu_ld_a64_i128:
400
+ case INDEX_op_qemu_st_a32_i128:
401
+ case INDEX_op_qemu_st_a64_i128:
402
{
403
const char *s_al, *s_op, *s_at;
404
MemOpIdx oi = op->args[k++];
405
diff --git a/tcg/tci.c b/tcg/tci.c
406
index XXXXXXX..XXXXXXX 100644
407
--- a/tcg/tci.c
408
+++ b/tcg/tci.c
409
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
410
tb_ptr = ptr;
411
break;
412
413
- case INDEX_op_qemu_ld_i32:
414
+ case INDEX_op_qemu_ld_a32_i32:
415
+ case INDEX_op_qemu_ld_a64_i32:
416
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
417
tci_args_rrm(insn, &r0, &r1, &oi);
418
taddr = regs[r1];
419
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
420
regs[r0] = tmp32;
421
break;
422
423
- case INDEX_op_qemu_ld_i64:
424
+ case INDEX_op_qemu_ld_a32_i64:
425
+ case INDEX_op_qemu_ld_a64_i64:
426
if (TCG_TARGET_REG_BITS == 64) {
427
tci_args_rrm(insn, &r0, &r1, &oi);
428
taddr = regs[r1];
429
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
430
}
431
break;
432
433
- case INDEX_op_qemu_st_i32:
434
+ case INDEX_op_qemu_st_a32_i32:
435
+ case INDEX_op_qemu_st_a64_i32:
436
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
437
tci_args_rrm(insn, &r0, &r1, &oi);
438
taddr = regs[r1];
439
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
440
tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
441
break;
442
443
- case INDEX_op_qemu_st_i64:
444
+ case INDEX_op_qemu_st_a32_i64:
445
+ case INDEX_op_qemu_st_a64_i64:
446
if (TCG_TARGET_REG_BITS == 64) {
447
tci_args_rrm(insn, &r0, &r1, &oi);
448
taddr = regs[r1];
449
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
450
str_r(r3), str_r(r4), str_r(r5));
451
break;
452
453
- case INDEX_op_qemu_ld_i64:
454
- case INDEX_op_qemu_st_i64:
455
- len = DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
456
+ case INDEX_op_qemu_ld_a32_i32:
457
+ case INDEX_op_qemu_st_a32_i32:
458
+ len = 1 + 1;
459
+ goto do_qemu_ldst;
460
+ case INDEX_op_qemu_ld_a32_i64:
461
+ case INDEX_op_qemu_st_a32_i64:
462
+ case INDEX_op_qemu_ld_a64_i32:
463
+ case INDEX_op_qemu_st_a64_i32:
464
+ len = 1 + DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
465
+ goto do_qemu_ldst;
466
+ case INDEX_op_qemu_ld_a64_i64:
467
+ case INDEX_op_qemu_st_a64_i64:
468
+ len = 2 * DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
469
goto do_qemu_ldst;
470
- case INDEX_op_qemu_ld_i32:
471
- case INDEX_op_qemu_st_i32:
472
- len = 1;
473
do_qemu_ldst:
474
- len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS);
475
switch (len) {
476
case 2:
477
tci_args_rrm(insn, &r0, &r1, &oi);
478
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
479
index XXXXXXX..XXXXXXX 100644
480
--- a/tcg/aarch64/tcg-target.c.inc
481
+++ b/tcg/aarch64/tcg-target.c.inc
482
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
483
tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
484
break;
485
486
- case INDEX_op_qemu_ld_i32:
487
- case INDEX_op_qemu_ld_i64:
488
+ case INDEX_op_qemu_ld_a32_i32:
489
+ case INDEX_op_qemu_ld_a64_i32:
490
+ case INDEX_op_qemu_ld_a32_i64:
491
+ case INDEX_op_qemu_ld_a64_i64:
492
tcg_out_qemu_ld(s, a0, a1, a2, ext);
493
break;
494
- case INDEX_op_qemu_st_i32:
495
- case INDEX_op_qemu_st_i64:
496
+ case INDEX_op_qemu_st_a32_i32:
497
+ case INDEX_op_qemu_st_a64_i32:
498
+ case INDEX_op_qemu_st_a32_i64:
499
+ case INDEX_op_qemu_st_a64_i64:
500
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
501
break;
502
- case INDEX_op_qemu_ld_i128:
503
+ case INDEX_op_qemu_ld_a32_i128:
504
+ case INDEX_op_qemu_ld_a64_i128:
505
tcg_out_qemu_ld128(s, a0, a1, a2, args[3]);
506
break;
507
- case INDEX_op_qemu_st_i128:
508
+ case INDEX_op_qemu_st_a32_i128:
509
+ case INDEX_op_qemu_st_a64_i128:
510
tcg_out_qemu_st128(s, REG0(0), REG0(1), a2, args[3]);
511
break;
512
513
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
514
case INDEX_op_movcond_i64:
515
return C_O1_I4(r, r, rA, rZ, rZ);
516
517
- case INDEX_op_qemu_ld_i32:
518
- case INDEX_op_qemu_ld_i64:
519
+ case INDEX_op_qemu_ld_a32_i32:
520
+ case INDEX_op_qemu_ld_a64_i32:
521
+ case INDEX_op_qemu_ld_a32_i64:
522
+ case INDEX_op_qemu_ld_a64_i64:
523
return C_O1_I1(r, l);
524
- case INDEX_op_qemu_ld_i128:
525
+ case INDEX_op_qemu_ld_a32_i128:
526
+ case INDEX_op_qemu_ld_a64_i128:
527
return C_O2_I1(r, r, l);
528
- case INDEX_op_qemu_st_i32:
529
- case INDEX_op_qemu_st_i64:
530
+ case INDEX_op_qemu_st_a32_i32:
531
+ case INDEX_op_qemu_st_a64_i32:
532
+ case INDEX_op_qemu_st_a32_i64:
533
+ case INDEX_op_qemu_st_a64_i64:
534
return C_O0_I2(lZ, l);
535
- case INDEX_op_qemu_st_i128:
536
+ case INDEX_op_qemu_st_a32_i128:
537
+ case INDEX_op_qemu_st_a64_i128:
538
return C_O0_I3(lZ, lZ, l);
539
540
case INDEX_op_deposit_i32:
541
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
542
index XXXXXXX..XXXXXXX 100644
543
--- a/tcg/arm/tcg-target.c.inc
544
+++ b/tcg/arm/tcg-target.c.inc
545
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
546
ARITH_MOV, args[0], 0, 0);
547
break;
548
549
- case INDEX_op_qemu_ld_i32:
550
- if (TARGET_LONG_BITS == 32) {
551
- tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
552
- args[2], TCG_TYPE_I32);
553
- } else {
554
- tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
555
- args[3], TCG_TYPE_I32);
556
- }
557
+ case INDEX_op_qemu_ld_a32_i32:
558
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
559
break;
560
- case INDEX_op_qemu_ld_i64:
561
- if (TARGET_LONG_BITS == 32) {
562
- tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
563
- args[3], TCG_TYPE_I64);
564
- } else {
565
- tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
566
- args[4], TCG_TYPE_I64);
567
- }
568
+ case INDEX_op_qemu_ld_a64_i32:
569
+ tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
570
+ args[3], TCG_TYPE_I32);
571
break;
572
- case INDEX_op_qemu_st_i32:
573
- if (TARGET_LONG_BITS == 32) {
574
- tcg_out_qemu_st(s, args[0], -1, args[1], -1,
575
- args[2], TCG_TYPE_I32);
576
- } else {
577
- tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
578
- args[3], TCG_TYPE_I32);
579
- }
580
+ case INDEX_op_qemu_ld_a32_i64:
581
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
582
+ args[3], TCG_TYPE_I64);
583
break;
584
- case INDEX_op_qemu_st_i64:
585
- if (TARGET_LONG_BITS == 32) {
586
- tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
587
- args[3], TCG_TYPE_I64);
588
- } else {
589
- tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
590
- args[4], TCG_TYPE_I64);
591
- }
592
+ case INDEX_op_qemu_ld_a64_i64:
593
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
594
+ args[4], TCG_TYPE_I64);
595
+ break;
596
+
597
+ case INDEX_op_qemu_st_a32_i32:
598
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
599
+ break;
600
+ case INDEX_op_qemu_st_a64_i32:
601
+ tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
602
+ args[3], TCG_TYPE_I32);
603
+ break;
604
+ case INDEX_op_qemu_st_a32_i64:
605
+ tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
606
+ args[3], TCG_TYPE_I64);
607
+ break;
608
+ case INDEX_op_qemu_st_a64_i64:
609
+ tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
610
+ args[4], TCG_TYPE_I64);
611
break;
612
613
case INDEX_op_bswap16_i32:
614
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
615
case INDEX_op_setcond2_i32:
616
return C_O1_I4(r, r, r, rI, rI);
617
618
- case INDEX_op_qemu_ld_i32:
619
- return TARGET_LONG_BITS == 32 ? C_O1_I1(r, q) : C_O1_I2(r, q, q);
620
- case INDEX_op_qemu_ld_i64:
621
- return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, q) : C_O2_I2(e, p, q, q);
622
- case INDEX_op_qemu_st_i32:
623
- return TARGET_LONG_BITS == 32 ? C_O0_I2(q, q) : C_O0_I3(q, q, q);
624
- case INDEX_op_qemu_st_i64:
625
- return TARGET_LONG_BITS == 32 ? C_O0_I3(Q, p, q) : C_O0_I4(Q, p, q, q);
626
+ case INDEX_op_qemu_ld_a32_i32:
627
+ return C_O1_I1(r, q);
628
+ case INDEX_op_qemu_ld_a64_i32:
629
+ return C_O1_I2(r, q, q);
630
+ case INDEX_op_qemu_ld_a32_i64:
631
+ return C_O2_I1(e, p, q);
632
+ case INDEX_op_qemu_ld_a64_i64:
633
+ return C_O2_I2(e, p, q, q);
634
+ case INDEX_op_qemu_st_a32_i32:
635
+ return C_O0_I2(q, q);
636
+ case INDEX_op_qemu_st_a64_i32:
637
+ return C_O0_I3(q, q, q);
638
+ case INDEX_op_qemu_st_a32_i64:
639
+ return C_O0_I3(Q, p, q);
640
+ case INDEX_op_qemu_st_a64_i64:
641
+ return C_O0_I4(Q, p, q, q);
642
643
case INDEX_op_st_vec:
644
return C_O0_I2(w, r);
645
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
646
index XXXXXXX..XXXXXXX 100644
647
--- a/tcg/i386/tcg-target.c.inc
648
+++ b/tcg/i386/tcg-target.c.inc
649
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
650
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
651
break;
652
653
- case INDEX_op_qemu_ld_i32:
654
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
655
- tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
656
- } else {
657
+ case INDEX_op_qemu_ld_a64_i32:
658
+ if (TCG_TARGET_REG_BITS == 32) {
659
tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
660
+ break;
661
}
662
+ /* fall through */
663
+ case INDEX_op_qemu_ld_a32_i32:
664
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
665
break;
666
- case INDEX_op_qemu_ld_i64:
667
+ case INDEX_op_qemu_ld_a32_i64:
668
if (TCG_TARGET_REG_BITS == 64) {
669
tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
670
- } else if (TARGET_LONG_BITS == 32) {
671
+ } else {
672
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
673
+ }
674
+ break;
675
+ case INDEX_op_qemu_ld_a64_i64:
676
+ if (TCG_TARGET_REG_BITS == 64) {
677
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
678
} else {
679
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
680
}
681
break;
682
- case INDEX_op_qemu_ld_i128:
683
+ case INDEX_op_qemu_ld_a32_i128:
684
+ case INDEX_op_qemu_ld_a64_i128:
685
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
686
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
687
break;
688
- case INDEX_op_qemu_st_i32:
689
- case INDEX_op_qemu_st8_i32:
690
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
691
- tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
692
- } else {
693
+
694
+ case INDEX_op_qemu_st_a64_i32:
695
+ case INDEX_op_qemu_st8_a64_i32:
696
+ if (TCG_TARGET_REG_BITS == 32) {
697
tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
698
+ break;
699
}
700
+ /* fall through */
701
+ case INDEX_op_qemu_st_a32_i32:
702
+ case INDEX_op_qemu_st8_a32_i32:
703
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
704
break;
705
- case INDEX_op_qemu_st_i64:
706
+ case INDEX_op_qemu_st_a32_i64:
707
if (TCG_TARGET_REG_BITS == 64) {
708
tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
709
- } else if (TARGET_LONG_BITS == 32) {
710
+ } else {
711
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
712
+ }
713
+ break;
714
+ case INDEX_op_qemu_st_a64_i64:
715
+ if (TCG_TARGET_REG_BITS == 64) {
716
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
717
} else {
718
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
719
}
720
break;
721
- case INDEX_op_qemu_st_i128:
722
+ case INDEX_op_qemu_st_a32_i128:
723
+ case INDEX_op_qemu_st_a64_i128:
724
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
725
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
726
break;
727
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
728
case INDEX_op_clz_i64:
729
return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
730
731
- case INDEX_op_qemu_ld_i32:
732
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
733
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
734
+ case INDEX_op_qemu_ld_a32_i32:
735
+ return C_O1_I1(r, L);
736
+ case INDEX_op_qemu_ld_a64_i32:
737
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L);
738
739
- case INDEX_op_qemu_st_i32:
740
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
741
- ? C_O0_I2(L, L) : C_O0_I3(L, L, L));
742
- case INDEX_op_qemu_st8_i32:
743
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
744
- ? C_O0_I2(s, L) : C_O0_I3(s, L, L));
745
+ case INDEX_op_qemu_st_a32_i32:
746
+ return C_O0_I2(L, L);
747
+ case INDEX_op_qemu_st_a64_i32:
748
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
749
+ case INDEX_op_qemu_st8_a32_i32:
750
+ return C_O0_I2(s, L);
751
+ case INDEX_op_qemu_st8_a64_i32:
752
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L);
753
754
- case INDEX_op_qemu_ld_i64:
755
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
756
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
757
- : C_O2_I2(r, r, L, L));
758
+ case INDEX_op_qemu_ld_a32_i64:
759
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L);
760
+ case INDEX_op_qemu_ld_a64_i64:
761
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L);
762
763
- case INDEX_op_qemu_st_i64:
764
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L)
765
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
766
- : C_O0_I4(L, L, L, L));
767
+ case INDEX_op_qemu_st_a32_i64:
768
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
769
+ case INDEX_op_qemu_st_a64_i64:
770
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
771
772
- case INDEX_op_qemu_ld_i128:
773
+ case INDEX_op_qemu_ld_a32_i128:
774
+ case INDEX_op_qemu_ld_a64_i128:
775
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
776
return C_O2_I1(r, r, L);
777
- case INDEX_op_qemu_st_i128:
778
+ case INDEX_op_qemu_st_a32_i128:
779
+ case INDEX_op_qemu_st_a64_i128:
780
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
781
return C_O0_I3(L, L, L);
782
783
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
784
index XXXXXXX..XXXXXXX 100644
785
--- a/tcg/loongarch64/tcg-target.c.inc
786
+++ b/tcg/loongarch64/tcg-target.c.inc
787
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
788
tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
789
break;
790
791
- case INDEX_op_qemu_ld_i32:
792
+ case INDEX_op_qemu_ld_a32_i32:
793
+ case INDEX_op_qemu_ld_a64_i32:
794
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
795
break;
796
- case INDEX_op_qemu_ld_i64:
797
+ case INDEX_op_qemu_ld_a32_i64:
798
+ case INDEX_op_qemu_ld_a64_i64:
799
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
800
break;
801
- case INDEX_op_qemu_st_i32:
802
+ case INDEX_op_qemu_st_a32_i32:
803
+ case INDEX_op_qemu_st_a64_i32:
804
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
805
break;
806
- case INDEX_op_qemu_st_i64:
807
+ case INDEX_op_qemu_st_a32_i64:
808
+ case INDEX_op_qemu_st_a64_i64:
809
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
810
break;
811
812
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
813
case INDEX_op_st32_i64:
814
case INDEX_op_st_i32:
815
case INDEX_op_st_i64:
816
- case INDEX_op_qemu_st_i32:
817
- case INDEX_op_qemu_st_i64:
818
+ case INDEX_op_qemu_st_a32_i32:
819
+ case INDEX_op_qemu_st_a64_i32:
820
+ case INDEX_op_qemu_st_a32_i64:
821
+ case INDEX_op_qemu_st_a64_i64:
822
return C_O0_I2(rZ, r);
823
824
case INDEX_op_brcond_i32:
825
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
826
case INDEX_op_ld32u_i64:
827
case INDEX_op_ld_i32:
828
case INDEX_op_ld_i64:
829
- case INDEX_op_qemu_ld_i32:
830
- case INDEX_op_qemu_ld_i64:
831
+ case INDEX_op_qemu_ld_a32_i32:
832
+ case INDEX_op_qemu_ld_a64_i32:
833
+ case INDEX_op_qemu_ld_a32_i64:
834
+ case INDEX_op_qemu_ld_a64_i64:
835
return C_O1_I1(r, r);
836
837
case INDEX_op_andc_i32:
838
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
839
index XXXXXXX..XXXXXXX 100644
840
--- a/tcg/mips/tcg-target.c.inc
841
+++ b/tcg/mips/tcg-target.c.inc
842
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
843
tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
844
break;
845
846
- case INDEX_op_qemu_ld_i32:
847
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
848
- tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
849
- } else {
850
+ case INDEX_op_qemu_ld_a64_i32:
851
+ if (TCG_TARGET_REG_BITS == 32) {
852
tcg_out_qemu_ld(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
853
+ break;
854
}
855
+ /* fall through */
856
+ case INDEX_op_qemu_ld_a32_i32:
857
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
858
break;
859
- case INDEX_op_qemu_ld_i64:
860
+ case INDEX_op_qemu_ld_a32_i64:
861
if (TCG_TARGET_REG_BITS == 64) {
862
tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
863
- } else if (TARGET_LONG_BITS == 32) {
864
+ } else {
865
tcg_out_qemu_ld(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
866
+ }
867
+ break;
868
+ case INDEX_op_qemu_ld_a64_i64:
869
+ if (TCG_TARGET_REG_BITS == 64) {
870
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
871
} else {
872
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
873
}
874
break;
875
- case INDEX_op_qemu_st_i32:
876
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
877
- tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
878
- } else {
879
+
880
+ case INDEX_op_qemu_st_a64_i32:
881
+ if (TCG_TARGET_REG_BITS == 32) {
882
tcg_out_qemu_st(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
883
+ break;
884
}
885
+ /* fall through */
886
+ case INDEX_op_qemu_st_a32_i32:
887
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
888
break;
889
- case INDEX_op_qemu_st_i64:
890
+ case INDEX_op_qemu_st_a32_i64:
891
if (TCG_TARGET_REG_BITS == 64) {
892
tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
893
- } else if (TARGET_LONG_BITS == 32) {
894
+ } else {
895
tcg_out_qemu_st(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
896
+ }
897
+ break;
898
+ case INDEX_op_qemu_st_a64_i64:
899
+ if (TCG_TARGET_REG_BITS == 64) {
900
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
901
} else {
902
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
903
}
904
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
905
case INDEX_op_brcond2_i32:
906
return C_O0_I4(rZ, rZ, rZ, rZ);
907
908
- case INDEX_op_qemu_ld_i32:
909
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
910
- ? C_O1_I1(r, r) : C_O1_I2(r, r, r));
911
- case INDEX_op_qemu_st_i32:
912
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
913
- ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
914
- case INDEX_op_qemu_ld_i64:
915
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
916
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
917
- : C_O2_I2(r, r, r, r));
918
- case INDEX_op_qemu_st_i64:
919
+ case INDEX_op_qemu_ld_a32_i32:
920
+ return C_O1_I1(r, r);
921
+ case INDEX_op_qemu_ld_a64_i32:
922
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
923
+ case INDEX_op_qemu_st_a32_i32:
924
+ return C_O0_I2(rZ, r);
925
+ case INDEX_op_qemu_st_a64_i32:
926
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r);
927
+ case INDEX_op_qemu_ld_a32_i64:
928
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
929
+ case INDEX_op_qemu_ld_a64_i64:
930
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
931
+ case INDEX_op_qemu_st_a32_i64:
932
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, rZ, r);
933
+ case INDEX_op_qemu_st_a64_i64:
934
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
935
- : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
936
: C_O0_I4(rZ, rZ, r, r));
937
938
default:
939
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
940
index XXXXXXX..XXXXXXX 100644
941
--- a/tcg/ppc/tcg-target.c.inc
942
+++ b/tcg/ppc/tcg-target.c.inc
943
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
944
tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
945
break;
946
947
- case INDEX_op_qemu_ld_i32:
948
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
949
- tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
950
- args[2], TCG_TYPE_I32);
951
- } else {
952
+ case INDEX_op_qemu_ld_a64_i32:
953
+ if (TCG_TARGET_REG_BITS == 32) {
954
tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
955
args[3], TCG_TYPE_I32);
956
+ break;
957
}
958
+ /* fall through */
959
+ case INDEX_op_qemu_ld_a32_i32:
960
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
961
break;
962
- case INDEX_op_qemu_ld_i64:
963
+ case INDEX_op_qemu_ld_a32_i64:
964
if (TCG_TARGET_REG_BITS == 64) {
965
tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
966
args[2], TCG_TYPE_I64);
967
- } else if (TARGET_LONG_BITS == 32) {
968
+ } else {
969
tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
970
args[3], TCG_TYPE_I64);
971
+ }
972
+ break;
973
+ case INDEX_op_qemu_ld_a64_i64:
974
+ if (TCG_TARGET_REG_BITS == 64) {
975
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
976
+ args[2], TCG_TYPE_I64);
977
} else {
978
tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
979
args[4], TCG_TYPE_I64);
980
}
981
break;
982
- case INDEX_op_qemu_ld_i128:
983
+ case INDEX_op_qemu_ld_a32_i128:
984
+ case INDEX_op_qemu_ld_a64_i128:
985
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
986
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
987
break;
988
989
- case INDEX_op_qemu_st_i32:
990
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
991
- tcg_out_qemu_st(s, args[0], -1, args[1], -1,
992
- args[2], TCG_TYPE_I32);
993
- } else {
994
+ case INDEX_op_qemu_st_a64_i32:
995
+ if (TCG_TARGET_REG_BITS == 32) {
996
tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
997
args[3], TCG_TYPE_I32);
998
+ break;
999
}
1000
+ /* fall through */
1001
+ case INDEX_op_qemu_st_a32_i32:
1002
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
1003
break;
1004
- case INDEX_op_qemu_st_i64:
1005
+ case INDEX_op_qemu_st_a32_i64:
1006
if (TCG_TARGET_REG_BITS == 64) {
1007
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
1008
args[2], TCG_TYPE_I64);
1009
- } else if (TARGET_LONG_BITS == 32) {
1010
+ } else {
1011
tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
1012
args[3], TCG_TYPE_I64);
1013
+ }
1014
+ break;
1015
+ case INDEX_op_qemu_st_a64_i64:
1016
+ if (TCG_TARGET_REG_BITS == 64) {
1017
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1,
1018
+ args[2], TCG_TYPE_I64);
1019
} else {
1020
tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
1021
args[4], TCG_TYPE_I64);
1022
}
1023
break;
1024
- case INDEX_op_qemu_st_i128:
1025
+ case INDEX_op_qemu_st_a32_i128:
1026
+ case INDEX_op_qemu_st_a64_i128:
1027
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1028
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
1029
break;
1030
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1031
case INDEX_op_sub2_i32:
1032
return C_O2_I4(r, r, rI, rZM, r, r);
1033
1034
- case INDEX_op_qemu_ld_i32:
1035
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
1036
- ? C_O1_I1(r, r)
1037
- : C_O1_I2(r, r, r));
1038
-
1039
- case INDEX_op_qemu_st_i32:
1040
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
1041
- ? C_O0_I2(r, r)
1042
- : C_O0_I3(r, r, r));
1043
-
1044
- case INDEX_op_qemu_ld_i64:
1045
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
1046
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
1047
- : C_O2_I2(r, r, r, r));
1048
-
1049
- case INDEX_op_qemu_st_i64:
1050
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
1051
- : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
1052
- : C_O0_I4(r, r, r, r));
1053
-
1054
- case INDEX_op_qemu_ld_i128:
1055
+ case INDEX_op_qemu_ld_a32_i32:
1056
+ return C_O1_I1(r, r);
1057
+ case INDEX_op_qemu_ld_a64_i32:
1058
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
1059
+ case INDEX_op_qemu_ld_a32_i64:
1060
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
1061
+ case INDEX_op_qemu_ld_a64_i64:
1062
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
1063
+ case INDEX_op_qemu_ld_a32_i128:
1064
+ case INDEX_op_qemu_ld_a64_i128:
1065
return C_O2_I1(o, m, r);
1066
- case INDEX_op_qemu_st_i128:
1067
+
1068
+ case INDEX_op_qemu_st_a32_i32:
1069
+ return C_O0_I2(r, r);
1070
+ case INDEX_op_qemu_st_a64_i32:
1071
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1072
+ case INDEX_op_qemu_st_a32_i64:
1073
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1074
+ case INDEX_op_qemu_st_a64_i64:
1075
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
1076
+ case INDEX_op_qemu_st_a32_i128:
1077
+ case INDEX_op_qemu_st_a64_i128:
1078
return C_O0_I3(o, m, r);
1079
1080
case INDEX_op_add_vec:
1081
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
1082
index XXXXXXX..XXXXXXX 100644
1083
--- a/tcg/riscv/tcg-target.c.inc
1084
+++ b/tcg/riscv/tcg-target.c.inc
1085
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1086
tcg_out_setcond(s, args[3], a0, a1, a2);
1087
break;
1088
1089
- case INDEX_op_qemu_ld_i32:
1090
+ case INDEX_op_qemu_ld_a32_i32:
1091
+ case INDEX_op_qemu_ld_a64_i32:
1092
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1093
break;
1094
- case INDEX_op_qemu_ld_i64:
1095
+ case INDEX_op_qemu_ld_a32_i64:
1096
+ case INDEX_op_qemu_ld_a64_i64:
1097
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1098
break;
1099
- case INDEX_op_qemu_st_i32:
1100
+ case INDEX_op_qemu_st_a32_i32:
1101
+ case INDEX_op_qemu_st_a64_i32:
1102
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1103
break;
1104
- case INDEX_op_qemu_st_i64:
1105
+ case INDEX_op_qemu_st_a32_i64:
1106
+ case INDEX_op_qemu_st_a64_i64:
1107
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1108
break;
1109
1110
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1111
case INDEX_op_sub2_i64:
1112
return C_O2_I4(r, r, rZ, rZ, rM, rM);
1113
1114
- case INDEX_op_qemu_ld_i32:
1115
- case INDEX_op_qemu_ld_i64:
1116
+ case INDEX_op_qemu_ld_a32_i32:
1117
+ case INDEX_op_qemu_ld_a64_i32:
1118
+ case INDEX_op_qemu_ld_a32_i64:
1119
+ case INDEX_op_qemu_ld_a64_i64:
1120
return C_O1_I1(r, r);
1121
- case INDEX_op_qemu_st_i32:
1122
- case INDEX_op_qemu_st_i64:
1123
+ case INDEX_op_qemu_st_a32_i32:
1124
+ case INDEX_op_qemu_st_a64_i32:
1125
+ case INDEX_op_qemu_st_a32_i64:
1126
+ case INDEX_op_qemu_st_a64_i64:
1127
return C_O0_I2(rZ, r);
1128
1129
default:
1130
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
1131
index XXXXXXX..XXXXXXX 100644
1132
--- a/tcg/s390x/tcg-target.c.inc
1133
+++ b/tcg/s390x/tcg-target.c.inc
1134
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1135
args[2], const_args[2], args[3], const_args[3], args[4]);
1136
break;
1137
1138
- case INDEX_op_qemu_ld_i32:
1139
+ case INDEX_op_qemu_ld_a32_i32:
1140
+ case INDEX_op_qemu_ld_a64_i32:
1141
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
1142
break;
1143
- case INDEX_op_qemu_ld_i64:
1144
+ case INDEX_op_qemu_ld_a32_i64:
1145
+ case INDEX_op_qemu_ld_a64_i64:
1146
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
1147
break;
1148
- case INDEX_op_qemu_st_i32:
1149
+ case INDEX_op_qemu_st_a32_i32:
1150
+ case INDEX_op_qemu_st_a64_i32:
1151
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
1152
break;
1153
- case INDEX_op_qemu_st_i64:
1154
+ case INDEX_op_qemu_st_a32_i64:
1155
+ case INDEX_op_qemu_st_a64_i64:
1156
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
1157
break;
1158
- case INDEX_op_qemu_ld_i128:
1159
+ case INDEX_op_qemu_ld_a32_i128:
1160
+ case INDEX_op_qemu_ld_a64_i128:
1161
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
1162
break;
1163
- case INDEX_op_qemu_st_i128:
1164
+ case INDEX_op_qemu_st_a32_i128:
1165
+ case INDEX_op_qemu_st_a64_i128:
1166
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
1167
break;
1168
1169
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1170
case INDEX_op_ctpop_i64:
1171
return C_O1_I1(r, r);
1172
1173
- case INDEX_op_qemu_ld_i32:
1174
- case INDEX_op_qemu_ld_i64:
1175
+ case INDEX_op_qemu_ld_a32_i32:
1176
+ case INDEX_op_qemu_ld_a64_i32:
1177
+ case INDEX_op_qemu_ld_a32_i64:
1178
+ case INDEX_op_qemu_ld_a64_i64:
1179
return C_O1_I1(r, r);
1180
- case INDEX_op_qemu_st_i64:
1181
- case INDEX_op_qemu_st_i32:
1182
+ case INDEX_op_qemu_st_a32_i64:
1183
+ case INDEX_op_qemu_st_a64_i64:
1184
+ case INDEX_op_qemu_st_a32_i32:
1185
+ case INDEX_op_qemu_st_a64_i32:
1186
return C_O0_I2(r, r);
1187
- case INDEX_op_qemu_ld_i128:
1188
+ case INDEX_op_qemu_ld_a32_i128:
1189
+ case INDEX_op_qemu_ld_a64_i128:
1190
return C_O2_I1(o, m, r);
1191
- case INDEX_op_qemu_st_i128:
1192
+ case INDEX_op_qemu_st_a32_i128:
1193
+ case INDEX_op_qemu_st_a64_i128:
1194
return C_O0_I3(o, m, r);
1195
1196
case INDEX_op_deposit_i32:
1197
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
1198
index XXXXXXX..XXXXXXX 100644
1199
--- a/tcg/sparc64/tcg-target.c.inc
1200
+++ b/tcg/sparc64/tcg-target.c.inc
1201
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1202
tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1203
break;
1204
1205
- case INDEX_op_qemu_ld_i32:
1206
+ case INDEX_op_qemu_ld_a32_i32:
1207
+ case INDEX_op_qemu_ld_a64_i32:
1208
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1209
break;
1210
- case INDEX_op_qemu_ld_i64:
1211
+ case INDEX_op_qemu_ld_a32_i64:
1212
+ case INDEX_op_qemu_ld_a64_i64:
1213
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1214
break;
1215
- case INDEX_op_qemu_st_i32:
1216
+ case INDEX_op_qemu_st_a32_i32:
1217
+ case INDEX_op_qemu_st_a64_i32:
1218
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1219
break;
1220
- case INDEX_op_qemu_st_i64:
1221
+ case INDEX_op_qemu_st_a32_i64:
1222
+ case INDEX_op_qemu_st_a64_i64:
1223
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1224
break;
1225
1226
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1227
case INDEX_op_extu_i32_i64:
1228
case INDEX_op_extrl_i64_i32:
1229
case INDEX_op_extrh_i64_i32:
1230
- case INDEX_op_qemu_ld_i32:
1231
- case INDEX_op_qemu_ld_i64:
1232
+ case INDEX_op_qemu_ld_a32_i32:
1233
+ case INDEX_op_qemu_ld_a64_i32:
1234
+ case INDEX_op_qemu_ld_a32_i64:
1235
+ case INDEX_op_qemu_ld_a64_i64:
1236
return C_O1_I1(r, r);
1237
1238
case INDEX_op_st8_i32:
1239
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1240
case INDEX_op_st_i32:
1241
case INDEX_op_st32_i64:
1242
case INDEX_op_st_i64:
1243
- case INDEX_op_qemu_st_i32:
1244
- case INDEX_op_qemu_st_i64:
1245
+ case INDEX_op_qemu_st_a32_i32:
1246
+ case INDEX_op_qemu_st_a64_i32:
1247
+ case INDEX_op_qemu_st_a32_i64:
1248
+ case INDEX_op_qemu_st_a64_i64:
1249
return C_O0_I2(rZ, r);
1250
1251
case INDEX_op_add_i32:
1252
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
1253
index XXXXXXX..XXXXXXX 100644
1254
--- a/tcg/tci/tcg-target.c.inc
1255
+++ b/tcg/tci/tcg-target.c.inc
1256
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1257
case INDEX_op_setcond2_i32:
1258
return C_O1_I4(r, r, r, r, r);
1259
1260
- case INDEX_op_qemu_ld_i32:
1261
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1262
- ? C_O1_I1(r, r)
1263
- : C_O1_I2(r, r, r));
1264
- case INDEX_op_qemu_ld_i64:
1265
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
1266
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, r)
1267
- : C_O2_I2(r, r, r, r));
1268
- case INDEX_op_qemu_st_i32:
1269
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1270
- ? C_O0_I2(r, r)
1271
- : C_O0_I3(r, r, r));
1272
- case INDEX_op_qemu_st_i64:
1273
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
1274
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(r, r, r)
1275
- : C_O0_I4(r, r, r, r));
1276
+ case INDEX_op_qemu_ld_a32_i32:
1277
+ return C_O1_I1(r, r);
1278
+ case INDEX_op_qemu_ld_a64_i32:
1279
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
1280
+ case INDEX_op_qemu_ld_a32_i64:
1281
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
1282
+ case INDEX_op_qemu_ld_a64_i64:
1283
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
1284
+ case INDEX_op_qemu_st_a32_i32:
1285
+ return C_O0_I2(r, r);
1286
+ case INDEX_op_qemu_st_a64_i32:
1287
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1288
+ case INDEX_op_qemu_st_a32_i64:
1289
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1290
+ case INDEX_op_qemu_st_a64_i64:
1291
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
1292
1293
default:
1294
g_assert_not_reached();
1295
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1296
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
1297
break;
1298
1299
- case INDEX_op_qemu_ld_i32:
1300
- case INDEX_op_qemu_st_i32:
1301
+ case INDEX_op_qemu_ld_a32_i32:
1302
+ case INDEX_op_qemu_ld_a64_i32:
1303
+ case INDEX_op_qemu_st_a32_i32:
1304
+ case INDEX_op_qemu_st_a64_i32:
1305
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
1306
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
1307
} else {
1308
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1309
}
1310
break;
1311
1312
- case INDEX_op_qemu_ld_i64:
1313
- case INDEX_op_qemu_st_i64:
1314
+ case INDEX_op_qemu_ld_a32_i64:
1315
+ case INDEX_op_qemu_ld_a64_i64:
1316
+ case INDEX_op_qemu_st_a32_i64:
1317
+ case INDEX_op_qemu_st_a64_i64:
1318
if (TCG_TARGET_REG_BITS == 64) {
1319
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
1320
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
1321
--
1322
2.34.1
1323
1324
diff view generated by jsdifflib
1
Forgetting this asserts when tcg_gen_cmp_vec is called from
1
We now have the address size as part of the opcode, so
2
within tcg_gen_cmpsel_vec.
2
we no longer need to test TARGET_LONG_BITS. We can use
3
uint64_t for target_ulong, as passed into load/store helpers.
3
4
4
Fixes: 72b4c792c7a
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/tcg-op-vec.c | 2 ++
8
tcg/tci.c | 61 +++++++++++++++++++++++++---------------
8
1 file changed, 2 insertions(+)
9
tcg/tci/tcg-target.c.inc | 15 +++++-----
10
2 files changed, 46 insertions(+), 30 deletions(-)
9
11
10
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
12
diff --git a/tcg/tci.c b/tcg/tci.c
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg-op-vec.c
14
--- a/tcg/tci.c
13
+++ b/tcg/tcg-op-vec.c
15
+++ b/tcg/tci.c
14
@@ -XXX,XX +XXX,XX @@ static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
16
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
15
TCGv_vec b, TCGOpcode opc, TCGCond cond)
17
return result;
18
}
19
20
-static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
21
+static uint64_t tci_qemu_ld(CPUArchState *env, uint64_t taddr,
22
MemOpIdx oi, const void *tb_ptr)
16
{
23
{
17
if (!do_op3(vece, r, a, b, opc)) {
24
MemOp mop = get_memop(oi);
18
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
25
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
19
tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
20
+ tcg_swap_vecop_list(hold_list);
21
}
26
}
22
}
27
}
23
28
29
-static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
30
+static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val,
31
MemOpIdx oi, const void *tb_ptr)
32
{
33
MemOp mop = get_memop(oi);
34
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
35
TCGReg r0, r1, r2, r3, r4, r5;
36
tcg_target_ulong t1;
37
TCGCond condition;
38
- target_ulong taddr;
39
uint8_t pos, len;
40
uint32_t tmp32;
41
- uint64_t tmp64;
42
+ uint64_t tmp64, taddr;
43
uint64_t T1, T2;
44
MemOpIdx oi;
45
int32_t ofs;
46
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
47
break;
48
49
case INDEX_op_qemu_ld_a32_i32:
50
+ tci_args_rrm(insn, &r0, &r1, &oi);
51
+ taddr = (uint32_t)regs[r1];
52
+ goto do_ld_i32;
53
case INDEX_op_qemu_ld_a64_i32:
54
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
55
+ if (TCG_TARGET_REG_BITS == 64) {
56
tci_args_rrm(insn, &r0, &r1, &oi);
57
taddr = regs[r1];
58
} else {
59
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
60
taddr = tci_uint64(regs[r2], regs[r1]);
61
}
62
- tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr);
63
- regs[r0] = tmp32;
64
+ do_ld_i32:
65
+ regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
66
break;
67
68
case INDEX_op_qemu_ld_a32_i64:
69
+ if (TCG_TARGET_REG_BITS == 64) {
70
+ tci_args_rrm(insn, &r0, &r1, &oi);
71
+ taddr = (uint32_t)regs[r1];
72
+ } else {
73
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
74
+ taddr = (uint32_t)regs[r2];
75
+ }
76
+ goto do_ld_i64;
77
case INDEX_op_qemu_ld_a64_i64:
78
if (TCG_TARGET_REG_BITS == 64) {
79
tci_args_rrm(insn, &r0, &r1, &oi);
80
taddr = regs[r1];
81
- } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
82
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
83
- taddr = regs[r2];
84
} else {
85
tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
86
taddr = tci_uint64(regs[r3], regs[r2]);
87
oi = regs[r4];
88
}
89
+ do_ld_i64:
90
tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
91
if (TCG_TARGET_REG_BITS == 32) {
92
tci_write_reg64(regs, r1, r0, tmp64);
93
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
94
break;
95
96
case INDEX_op_qemu_st_a32_i32:
97
+ tci_args_rrm(insn, &r0, &r1, &oi);
98
+ taddr = (uint32_t)regs[r1];
99
+ goto do_st_i32;
100
case INDEX_op_qemu_st_a64_i32:
101
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
102
+ if (TCG_TARGET_REG_BITS == 64) {
103
tci_args_rrm(insn, &r0, &r1, &oi);
104
taddr = regs[r1];
105
} else {
106
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
107
taddr = tci_uint64(regs[r2], regs[r1]);
108
}
109
- tmp32 = regs[r0];
110
- tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
111
+ do_st_i32:
112
+ tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
113
break;
114
115
case INDEX_op_qemu_st_a32_i64:
116
+ if (TCG_TARGET_REG_BITS == 64) {
117
+ tci_args_rrm(insn, &r0, &r1, &oi);
118
+ tmp64 = regs[r0];
119
+ taddr = (uint32_t)regs[r1];
120
+ } else {
121
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
122
+ tmp64 = tci_uint64(regs[r1], regs[r0]);
123
+ taddr = (uint32_t)regs[r2];
124
+ }
125
+ goto do_st_i64;
126
case INDEX_op_qemu_st_a64_i64:
127
if (TCG_TARGET_REG_BITS == 64) {
128
tci_args_rrm(insn, &r0, &r1, &oi);
129
- taddr = regs[r1];
130
tmp64 = regs[r0];
131
+ taddr = regs[r1];
132
} else {
133
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
134
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
135
- taddr = regs[r2];
136
- } else {
137
- tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
138
- taddr = tci_uint64(regs[r3], regs[r2]);
139
- oi = regs[r4];
140
- }
141
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
142
tmp64 = tci_uint64(regs[r1], regs[r0]);
143
+ taddr = tci_uint64(regs[r3], regs[r2]);
144
+ oi = regs[r4];
145
}
146
+ do_st_i64:
147
tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
148
break;
149
150
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
151
index XXXXXXX..XXXXXXX 100644
152
--- a/tcg/tci/tcg-target.c.inc
153
+++ b/tcg/tci/tcg-target.c.inc
154
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
155
return false;
156
}
157
158
-static void stack_bounds_check(TCGReg base, target_long offset)
159
+static void stack_bounds_check(TCGReg base, intptr_t offset)
160
{
161
if (base == TCG_REG_CALL_STACK) {
162
tcg_debug_assert(offset >= 0);
163
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
164
break;
165
166
case INDEX_op_qemu_ld_a32_i32:
167
- case INDEX_op_qemu_ld_a64_i32:
168
case INDEX_op_qemu_st_a32_i32:
169
+ tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
170
+ break;
171
+ case INDEX_op_qemu_ld_a64_i32:
172
case INDEX_op_qemu_st_a64_i32:
173
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
174
+ case INDEX_op_qemu_ld_a32_i64:
175
+ case INDEX_op_qemu_st_a32_i64:
176
+ if (TCG_TARGET_REG_BITS == 64) {
177
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
178
} else {
179
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
180
}
181
break;
182
-
183
- case INDEX_op_qemu_ld_a32_i64:
184
case INDEX_op_qemu_ld_a64_i64:
185
- case INDEX_op_qemu_st_a32_i64:
186
case INDEX_op_qemu_st_a64_i64:
187
if (TCG_TARGET_REG_BITS == 64) {
188
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
189
- } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
190
- tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
191
} else {
192
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
193
tcg_out_op_rrrrr(s, opc, args[0], args[1],
24
--
194
--
25
2.25.1
195
2.34.1
26
196
27
197
diff view generated by jsdifflib
New patch
1
Keep all 32-bit values zero extended in the register, not solely when
2
addresses are 32 bits. This eliminates a dependency on TARGET_LONG_BITS.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target.h | 6 +++---
8
1 file changed, 3 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.h
13
+++ b/tcg/i386/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
15
#define TCG_TARGET_HAS_mulsh_i32 0
16
17
#if TCG_TARGET_REG_BITS == 64
18
-/* Keep target addresses zero-extended in a register. */
19
-#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32)
20
-#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32)
21
+/* Keep 32-bit values zero-extended in a register. */
22
+#define TCG_TARGET_HAS_extrl_i64_i32 1
23
+#define TCG_TARGET_HAS_extrh_i64_i32 1
24
#define TCG_TARGET_HAS_div2_i64 1
25
#define TCG_TARGET_HAS_rot_i64 1
26
#define TCG_TARGET_HAS_ext8s_i64 1
27
--
28
2.34.1
29
30
diff view generated by jsdifflib
New patch
1
Since TCG_TYPE_I32 values are kept zero-extended in registers, via
2
omission of the REXW bit, we need not extend if the register matches.
3
This is already relied upon by qemu_{ld,st}.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.c.inc | 4 +++-
10
1 file changed, 3 insertions(+), 1 deletion(-)
11
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
17
18
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
19
{
20
- tcg_out_ext32u(s, dest, src);
21
+ if (dest != src) {
22
+ tcg_out_ext32u(s, dest, src);
23
+ }
24
}
25
26
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
27
--
28
2.34.1
29
30
diff view generated by jsdifflib
New patch
1
Because of its use on tgen_arithi, this value must be a signed
2
32-bit quantity, as that is what may be encoded in the insn.
3
The truncation of the value to unsigned for 32-bit guests is
4
done via the REX bit via 'trexw'.
1
5
6
Removes the only uses of target_ulong from this tcg backend.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/i386/tcg-target.c.inc | 4 ++--
12
1 file changed, 2 insertions(+), 2 deletions(-)
13
14
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/i386/tcg-target.c.inc
17
+++ b/tcg/i386/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
19
int trexw = 0, hrexw = 0, tlbrexw = 0;
20
unsigned mem_index = get_mmuidx(oi);
21
unsigned s_mask = (1 << s_bits) - 1;
22
- target_ulong tlb_mask;
23
+ int tlb_mask;
24
25
ldst = new_ldst_label(s);
26
ldst->is_ld = is_ld;
27
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
29
addrlo, s_mask - a_mask);
30
}
31
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
32
+ tlb_mask = TARGET_PAGE_MASK | a_mask;
33
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
34
35
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
36
--
37
2.34.1
38
39
diff view generated by jsdifflib
New patch
1
All uses can be infered from the INDEX_op_qemu_*_a{32,64}_* opcode
2
being used. Add a field into TCGLabelQemuLdst to record the usage.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target.c.inc | 8 +++-----
8
1 file changed, 3 insertions(+), 5 deletions(-)
9
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.c.inc
13
+++ b/tcg/i386/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
ldst->addrhi_reg = addrhi;
16
17
if (TCG_TARGET_REG_BITS == 64) {
18
- if (TARGET_LONG_BITS == 64) {
19
- ttype = TCG_TYPE_I64;
20
- trexw = P_REXW;
21
- }
22
+ ttype = s->addr_type;
23
+ trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
24
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
25
hrexw = P_REXW;
26
if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
27
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
ldst->label_ptr[0] = s->code_ptr;
29
s->code_ptr += 4;
30
31
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
32
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
33
/* cmp 4(TCG_REG_L0), addrhi */
34
tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
35
36
--
37
2.34.1
38
39
diff view generated by jsdifflib
New patch
1
All uses can be infered from the INDEX_op_qemu_*_a{32,64}_*
2
opcode being used.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/arm/tcg-target.c.inc | 14 +++++++-------
8
1 file changed, 7 insertions(+), 7 deletions(-)
9
10
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/arm/tcg-target.c.inc
13
+++ b/tcg/arm/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
* Load the tlb comparator into R2/R3 and the fast path addend into R1.
16
*/
17
if (cmp_off == 0) {
18
- if (TARGET_LONG_BITS == 64) {
19
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
20
- } else {
21
+ if (s->addr_type == TCG_TYPE_I32) {
22
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
23
+ } else {
24
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
25
}
26
} else {
27
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
28
TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
29
- if (TARGET_LONG_BITS == 64) {
30
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
31
- } else {
32
+ if (s->addr_type == TCG_TYPE_I32) {
33
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
34
+ } else {
35
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
36
}
37
}
38
39
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
40
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
41
}
42
43
- if (TARGET_LONG_BITS == 64) {
44
+ if (s->addr_type != TCG_TYPE_I32) {
45
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
46
}
47
#else
48
--
49
2.34.1
50
51
diff view generated by jsdifflib
New patch
1
Eliminate the test vs TARGET_LONG_BITS by considering this
2
predicate to be always true, and simplify accordingly.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target.c.inc | 19 +++++++++----------
8
1 file changed, 9 insertions(+), 10 deletions(-)
9
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/aarch64/tcg-target.c.inc
13
+++ b/tcg/aarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ bool have_lse2;
15
#define TCG_VEC_TMP0 TCG_REG_V31
16
17
#ifndef CONFIG_SOFTMMU
18
-/* Note that XZR cannot be encoded in the address base register slot,
19
- as that actaully encodes SP. So if we need to zero-extend the guest
20
- address, via the address index register slot, we need to load even
21
- a zero guest base into a register. */
22
-#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
23
#define TCG_REG_GUEST_BASE TCG_REG_X28
24
#endif
25
26
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
27
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
28
}
29
30
- if (USE_GUEST_BASE) {
31
+ if (guest_base || addr_type == TCG_TYPE_I32) {
32
h->base = TCG_REG_GUEST_BASE;
33
h->index = addr_reg;
34
h->index_ext = addr_type;
35
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
36
CPU_TEMP_BUF_NLONGS * sizeof(long));
37
38
#if !defined(CONFIG_SOFTMMU)
39
- if (USE_GUEST_BASE) {
40
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
41
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
42
- }
43
+ /*
44
+ * Note that XZR cannot be encoded in the address base register slot,
45
+ * as that actaully encodes SP. Depending on the guest, we may need
46
+ * to zero-extend the guest address via the address index register slot,
47
+ * therefore we need to load even a zero guest base into a register.
48
+ */
49
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
50
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
51
#endif
52
53
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
54
--
55
2.34.1
56
57
diff view generated by jsdifflib
New patch
1
All uses replaced with TCGContext.addr_type.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/aarch64/tcg-target.c.inc | 11 +++++------
7
1 file changed, 5 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/aarch64/tcg-target.c.inc
12
+++ b/tcg/aarch64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
14
TCGReg addr_reg, MemOpIdx oi,
15
bool is_ld)
16
{
17
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
18
+ TCGType addr_type = s->addr_type;
19
TCGLabelQemuLdst *ldst = NULL;
20
MemOp opc = get_memop(oi);
21
MemOp s_bits = opc & MO_SIZE;
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
24
25
/* Load the tlb comparator into X0, and the fast path addend into X1. */
26
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
27
+ tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
28
is_ld ? offsetof(CPUTLBEntry, addr_read)
29
: offsetof(CPUTLBEntry, addr_write));
30
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
31
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
32
if (a_mask >= s_mask) {
33
x3 = addr_reg;
34
} else {
35
- tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
36
+ tcg_out_insn(s, 3401, ADDI, addr_type,
37
TCG_REG_X3, addr_reg, s_mask - a_mask);
38
x3 = TCG_REG_X3;
39
}
40
compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
41
42
/* Store the page mask part of the address into X3. */
43
- tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
44
- TCG_REG_X3, x3, compare_mask);
45
+ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
46
47
/* Perform the address comparison. */
48
- tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
49
+ tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
50
51
/* If not equal, we jump to the slow path. */
52
ldst->label_ptr[0] = s->code_ptr;
53
--
54
2.34.1
55
56
diff view generated by jsdifflib
New patch
1
All uses replaced with TCGContext.addr_type.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/loongarch64/tcg-target.c.inc | 9 +++++----
7
1 file changed, 5 insertions(+), 4 deletions(-)
8
9
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/loongarch64/tcg-target.c.inc
12
+++ b/tcg/loongarch64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
14
TCGReg addr_reg, MemOpIdx oi,
15
bool is_ld)
16
{
17
+ TCGType addr_type = s->addr_type;
18
TCGLabelQemuLdst *ldst = NULL;
19
MemOp opc = get_memop(oi);
20
MemOp a_bits;
21
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
22
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
23
24
/* Load the tlb comparator and the addend. */
25
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
26
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
27
is_ld ? offsetof(CPUTLBEntry, addr_read)
28
: offsetof(CPUTLBEntry, addr_write));
29
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
30
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
31
if (a_bits < s_bits) {
32
unsigned a_mask = (1u << a_bits) - 1;
33
unsigned s_mask = (1u << s_bits) - 1;
34
- tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
35
+ tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
36
} else {
37
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg);
38
+ tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
39
}
40
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
41
a_bits, TARGET_PAGE_BITS - 1);
42
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
43
h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
44
#endif
45
46
- if (TARGET_LONG_BITS == 32) {
47
+ if (addr_type == TCG_TYPE_I32) {
48
h->base = TCG_REG_TMP0;
49
tcg_out_ext32u(s, h->base, addr_reg);
50
} else {
51
--
52
2.34.1
53
54
diff view generated by jsdifflib
1
When single-stepping with a debugger attached to QEMU, and when an
1
All uses replaced with TCGContext.addr_type.
2
interrupt is raised, the debugger misses the first instruction after
3
the interrupt.
4
2
5
Tested-by: Luc Michel <luc.michel@greensocs.com>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luc Michel <luc.michel@greensocs.com>
7
Buglink: https://bugs.launchpad.net/qemu/+bug/757702
8
Message-Id: <20200717163029.2737546-1-richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
5
---
11
accel/tcg/cpu-exec.c | 8 +++++++-
6
tcg/mips/tcg-target.c.inc | 42 +++++++++++++++++++++------------------
12
1 file changed, 7 insertions(+), 1 deletion(-)
7
1 file changed, 23 insertions(+), 19 deletions(-)
13
8
14
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
9
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/cpu-exec.c
11
--- a/tcg/mips/tcg-target.c.inc
17
+++ b/accel/tcg/cpu-exec.c
12
+++ b/tcg/mips/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
13
@@ -XXX,XX +XXX,XX @@ typedef enum {
19
else {
14
/* Aliases for convenience. */
20
if (cc->cpu_exec_interrupt(cpu, interrupt_request)) {
15
ALIAS_PADD = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
21
replay_interrupt();
16
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
22
- cpu->exception_index = -1;
17
- ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
23
+ /*
18
- ? OPC_SRL : OPC_DSRL,
24
+ * After processing the interrupt, ensure an EXCP_DEBUG is
19
- ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
25
+ * raised when single-stepping so that GDB doesn't miss the
20
- ? OPC_ADDIU : OPC_DADDIU,
26
+ * next instruction.
21
} MIPSInsn;
27
+ */
22
28
+ cpu->exception_index =
23
/*
29
+ (cpu->singlestep_enabled ? EXCP_DEBUG : -1);
24
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
30
*last_tb = NULL;
25
TCGReg addrlo, TCGReg addrhi,
31
}
26
MemOpIdx oi, bool is_ld)
32
/* The target hook may have updated the 'cpu->interrupt_request';
27
{
28
+ TCGType addr_type = s->addr_type;
29
TCGLabelQemuLdst *ldst = NULL;
30
MemOp opc = get_memop(oi);
31
MemOp a_bits;
32
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
33
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
34
35
/* Extract the TLB index from the address into TMP3. */
36
- tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
37
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
38
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
39
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
40
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
41
+ } else {
42
+ tcg_out_dsrl(s, TCG_TMP3, addrlo,
43
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
44
+ }
45
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
46
47
/* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
48
tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
49
50
- /* Load the (low-half) tlb comparator. */
51
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
52
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
53
- } else {
54
- tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
55
- }
56
-
57
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
58
+ if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
59
+ /* Load the tlb comparator. */
60
+ tcg_out_ld(s, addr_type, TCG_TMP0, TCG_TMP3, cmp_off);
61
/* Load the tlb addend for the fast path. */
62
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
63
+ } else {
64
+ /* Load the low half of the tlb comparator. */
65
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
66
}
67
68
/*
69
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
70
* For unaligned accesses, compare against the end of the access to
71
* verify that it does not cross a page boundary.
72
*/
73
- tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
74
+ tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
75
if (a_mask < s_mask) {
76
- tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
77
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
78
+ tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
79
+ } else {
80
+ tcg_out_opc_imm(s, OPC_DADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
81
+ }
82
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
83
} else {
84
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
85
}
86
87
/* Zero extend a 32-bit guest address for a 64-bit host. */
88
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
89
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
90
tcg_out_ext32u(s, TCG_TMP2, addrlo);
91
addrlo = TCG_TMP2;
92
}
93
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
95
96
/* Load and test the high half tlb comparator. */
97
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
98
+ if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
99
/* delay slot */
100
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
101
102
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
103
}
104
105
base = addrlo;
106
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
107
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
108
tcg_out_ext32u(s, TCG_REG_A0, base);
109
base = TCG_REG_A0;
110
}
33
--
111
--
34
2.25.1
112
2.34.1
35
113
36
114
diff view generated by jsdifflib
1
From: Luc Michel <luc.michel@greensocs.com>
1
All uses replaced with TCGContext.addr_type.
2
2
3
When single-stepping with a debugger attached to QEMU, and when an
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
exception is raised, the debugger misses the first instruction after the
5
exception:
6
7
$ qemu-system-aarch64 -M virt -display none -cpu cortex-a53 -s -S
8
9
$ aarch64-linux-gnu-gdb
10
GNU gdb (GDB) 9.2
11
[...]
12
(gdb) tar rem :1234
13
Remote debugging using :1234
14
warning: No executable has been specified and target does not support
15
determining executable automatically. Try using the "file" command.
16
0x0000000000000000 in ?? ()
17
(gdb) # writing nop insns to 0x200 and 0x204
18
(gdb) set *0x200 = 0xd503201f
19
(gdb) set *0x204 = 0xd503201f
20
(gdb) # 0x0 address contains 0 which is an invalid opcode.
21
(gdb) # The CPU should raise an exception and jump to 0x200
22
(gdb) si
23
0x0000000000000204 in ?? ()
24
25
With this commit, the same run steps correctly on the first instruction
26
of the exception vector:
27
28
(gdb) si
29
0x0000000000000200 in ?? ()
30
31
Buglink: https://bugs.launchpad.net/qemu/+bug/757702
32
Signed-off-by: Luc Michel <luc.michel@greensocs.com>
33
Message-Id: <20200716193947.3058389-1-luc.michel@greensocs.com>
34
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
35
---
5
---
36
accel/tcg/cpu-exec.c | 11 +++++++++++
6
tcg/tcg.c | 27 ++++++++++++++-------------
37
1 file changed, 11 insertions(+)
7
1 file changed, 14 insertions(+), 13 deletions(-)
38
8
39
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
40
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
41
--- a/accel/tcg/cpu-exec.c
11
--- a/tcg/tcg.c
42
+++ b/accel/tcg/cpu-exec.c
12
+++ b/tcg/tcg.c
43
@@ -XXX,XX +XXX,XX @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
44
cc->do_interrupt(cpu);
14
next_arg = 1;
45
qemu_mutex_unlock_iothread();
15
46
cpu->exception_index = -1;
16
loc = &info->in[next_arg];
47
+
17
- if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
48
+ if (unlikely(cpu->singlestep_enabled)) {
18
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
49
+ /*
19
- ldst->addrlo_reg, ldst->addrhi_reg);
50
+ * After processing the exception, ensure an EXCP_DEBUG is
20
- tcg_out_helper_load_slots(s, nmov, mov, parm);
51
+ * raised when single-stepping so that GDB doesn't miss the
21
- next_arg += nmov;
52
+ * next instruction.
22
- } else {
53
+ */
23
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
54
+ *ret = EXCP_DEBUG;
24
/*
55
+ cpu_handle_debug_exception(cpu);
25
* 32-bit host with 32-bit guest: zero-extend the guest address
56
+ return true;
26
* to 64-bits for the helper by storing the low part, then
57
+ }
27
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
58
} else if (!replay_has_interrupt()) {
28
tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
59
/* give a chance to iothread in replay mode */
29
TCG_TYPE_I32, 0, parm);
60
*ret = EXCP_INTERRUPT;
30
next_arg += 2;
31
+ } else {
32
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
33
+ ldst->addrlo_reg, ldst->addrhi_reg);
34
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
35
+ next_arg += nmov;
36
}
37
38
switch (info->out_kind) {
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
40
41
/* Handle addr argument. */
42
loc = &info->in[next_arg];
43
- if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
44
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
45
- ldst->addrlo_reg, ldst->addrhi_reg);
46
- next_arg += n;
47
- nmov += n;
48
- } else {
49
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
50
/*
51
* 32-bit host with 32-bit guest: zero-extend the guest address
52
* to 64-bits for the helper by storing the low part. Later,
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
54
ldst->addrlo_reg, -1);
55
next_arg += 2;
56
nmov += 1;
57
+ } else {
58
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
59
+ ldst->addrlo_reg, ldst->addrhi_reg);
60
+ next_arg += n;
61
+ nmov += n;
62
}
63
64
/* Handle data argument. */
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
66
g_assert_not_reached();
67
}
68
69
- if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32) {
70
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
71
+ /* Zero extend the address by loading a zero for the high part. */
72
loc = &info->in[1 + !HOST_BIG_ENDIAN];
73
tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
74
}
61
--
75
--
62
2.25.1
76
2.34.1
63
77
64
78
diff view generated by jsdifflib
New patch
1
Disconnect guest page size from TCG compilation.
2
While this could be done via exec/target_page.h, we want to cache
3
the value across multiple memory access operations, so we might
4
as well initialize this early.
1
5
6
The changes within tcg/ are entirely mechanical:
7
8
sed -i s/TARGET_PAGE_BITS/s->page_bits/g
9
sed -i s/TARGET_PAGE_MASK/s->page_mask/g
10
11
Reviewed-by: Anton Johansson <anjo@rev.ng>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
include/tcg/tcg.h | 5 +++++
15
accel/tcg/translate-all.c | 4 ++++
16
tcg/aarch64/tcg-target.c.inc | 6 +++---
17
tcg/arm/tcg-target.c.inc | 10 +++++-----
18
tcg/i386/tcg-target.c.inc | 6 +++---
19
tcg/loongarch64/tcg-target.c.inc | 4 ++--
20
tcg/mips/tcg-target.c.inc | 6 +++---
21
tcg/ppc/tcg-target.c.inc | 14 +++++++-------
22
tcg/riscv/tcg-target.c.inc | 4 ++--
23
tcg/s390x/tcg-target.c.inc | 4 ++--
24
tcg/sparc64/tcg-target.c.inc | 4 ++--
25
11 files changed, 38 insertions(+), 29 deletions(-)
26
27
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/tcg/tcg.h
30
+++ b/include/tcg/tcg.h
31
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
32
int nb_ops;
33
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
34
35
+#ifdef CONFIG_SOFTMMU
36
+ int page_mask;
37
+ uint8_t page_bits;
38
+#endif
39
+
40
TCGRegSet reserved_regs;
41
intptr_t current_frame_offset;
42
intptr_t frame_start;
43
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/accel/tcg/translate-all.c
46
+++ b/accel/tcg/translate-all.c
47
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
48
tb_set_page_addr1(tb, -1);
49
tcg_ctx->gen_tb = tb;
50
tcg_ctx->addr_type = TCG_TYPE_TL;
51
+#ifdef CONFIG_SOFTMMU
52
+ tcg_ctx->page_bits = TARGET_PAGE_BITS;
53
+ tcg_ctx->page_mask = TARGET_PAGE_MASK;
54
+#endif
55
56
tb_overflow:
57
58
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/aarch64/tcg-target.c.inc
61
+++ b/tcg/aarch64/tcg-target.c.inc
62
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
63
ldst->oi = oi;
64
ldst->addrlo_reg = addr_reg;
65
66
- mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
67
+ mask_type = (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32
68
? TCG_TYPE_I64 : TCG_TYPE_I32);
69
70
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
71
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
72
/* Extract the TLB index from the address into X0. */
73
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
74
TCG_REG_X0, TCG_REG_X0, addr_reg,
75
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
76
+ s->page_bits - CPU_TLB_ENTRY_BITS);
77
78
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
79
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
80
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
81
TCG_REG_X3, addr_reg, s_mask - a_mask);
82
x3 = TCG_REG_X3;
83
}
84
- compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
85
+ compare_mask = (uint64_t)s->page_mask | a_mask;
86
87
/* Store the page mask part of the address into X3. */
88
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
89
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
90
index XXXXXXX..XXXXXXX 100644
91
--- a/tcg/arm/tcg-target.c.inc
92
+++ b/tcg/arm/tcg-target.c.inc
93
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
95
/* Extract the tlb index from the address into R0. */
96
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
97
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
98
+ SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
99
100
/*
101
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
102
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
103
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
104
addrlo, s_mask - a_mask);
105
}
106
- if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
107
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
108
+ if (use_armv7_instructions && s->page_bits <= 16) {
109
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
110
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
111
t_addr, TCG_REG_TMP, 0);
112
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
113
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
114
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
115
}
116
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
117
- SHIFT_IMM_LSR(TARGET_PAGE_BITS));
118
+ SHIFT_IMM_LSR(s->page_bits));
119
tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
120
0, TCG_REG_R2, TCG_REG_TMP,
121
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
122
+ SHIFT_IMM_LSL(s->page_bits));
123
}
124
125
if (s->addr_type != TCG_TYPE_I32) {
126
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
127
index XXXXXXX..XXXXXXX 100644
128
--- a/tcg/i386/tcg-target.c.inc
129
+++ b/tcg/i386/tcg-target.c.inc
130
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
131
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
132
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
133
hrexw = P_REXW;
134
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
135
+ if (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32) {
136
tlbtype = TCG_TYPE_I64;
137
tlbrexw = P_REXW;
138
}
139
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
140
141
tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
142
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
143
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
144
+ s->page_bits - CPU_TLB_ENTRY_BITS);
145
146
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
147
TLB_MASK_TABLE_OFS(mem_index) +
148
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
149
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
150
addrlo, s_mask - a_mask);
151
}
152
- tlb_mask = TARGET_PAGE_MASK | a_mask;
153
+ tlb_mask = s->page_mask | a_mask;
154
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
155
156
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
157
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tcg/loongarch64/tcg-target.c.inc
160
+++ b/tcg/loongarch64/tcg-target.c.inc
161
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
162
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
163
164
tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
165
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
166
+ s->page_bits - CPU_TLB_ENTRY_BITS);
167
tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
168
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
169
170
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
171
tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
172
}
173
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
174
- a_bits, TARGET_PAGE_BITS - 1);
175
+ a_bits, s->page_bits - 1);
176
177
/* Compare masked address with the TLB entry. */
178
ldst->label_ptr[0] = s->code_ptr;
179
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
180
index XXXXXXX..XXXXXXX 100644
181
--- a/tcg/mips/tcg-target.c.inc
182
+++ b/tcg/mips/tcg-target.c.inc
183
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
184
/* Extract the TLB index from the address into TMP3. */
185
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
186
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
187
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
188
+ s->page_bits - CPU_TLB_ENTRY_BITS);
189
} else {
190
tcg_out_dsrl(s, TCG_TMP3, addrlo,
191
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
192
+ s->page_bits - CPU_TLB_ENTRY_BITS);
193
}
194
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
195
196
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
197
* For unaligned accesses, compare against the end of the access to
198
* verify that it does not cross a page boundary.
199
*/
200
- tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
201
+ tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
202
if (a_mask < s_mask) {
203
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
204
tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
205
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
206
index XXXXXXX..XXXXXXX 100644
207
--- a/tcg/ppc/tcg-target.c.inc
208
+++ b/tcg/ppc/tcg-target.c.inc
209
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
210
/* Extract the page index, shifted into place for tlb index. */
211
if (TCG_TARGET_REG_BITS == 32) {
212
tcg_out_shri32(s, TCG_REG_R0, addrlo,
213
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
214
+ s->page_bits - CPU_TLB_ENTRY_BITS);
215
} else {
216
tcg_out_shri64(s, TCG_REG_R0, addrlo,
217
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
218
+ s->page_bits - CPU_TLB_ENTRY_BITS);
219
}
220
tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
221
222
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
223
a_bits = s_bits;
224
}
225
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
226
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
227
+ (32 - a_bits) & 31, 31 - s->page_bits);
228
} else {
229
TCGReg t = addrlo;
230
231
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
232
/* Mask the address for the requested alignment. */
233
if (TARGET_LONG_BITS == 32) {
234
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
235
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
236
+ (32 - a_bits) & 31, 31 - s->page_bits);
237
} else if (a_bits == 0) {
238
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
239
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
240
} else {
241
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
242
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
243
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
244
+ 64 - s->page_bits, s->page_bits - a_bits);
245
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
246
}
247
}
248
249
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
250
index XXXXXXX..XXXXXXX 100644
251
--- a/tcg/riscv/tcg-target.c.inc
252
+++ b/tcg/riscv/tcg-target.c.inc
253
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
254
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
255
256
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
257
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
258
+ s->page_bits - CPU_TLB_ENTRY_BITS);
259
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
260
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
261
262
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
263
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
264
addr_adj, addr_reg, s_mask - a_mask);
265
}
266
- compare_mask = TARGET_PAGE_MASK | a_mask;
267
+ compare_mask = s->page_mask | a_mask;
268
if (compare_mask == sextreg(compare_mask, 0, 12)) {
269
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
270
} else {
271
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
272
index XXXXXXX..XXXXXXX 100644
273
--- a/tcg/s390x/tcg-target.c.inc
274
+++ b/tcg/s390x/tcg-target.c.inc
275
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
276
ldst->addrlo_reg = addr_reg;
277
278
tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
279
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
280
+ s->page_bits - CPU_TLB_ENTRY_BITS);
281
282
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
283
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
284
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
285
* cross pages using the address of the last byte of the access.
286
*/
287
a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
288
- tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
289
+ tlb_mask = (uint64_t)s->page_mask | a_mask;
290
if (a_off == 0) {
291
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
292
} else {
293
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
294
index XXXXXXX..XXXXXXX 100644
295
--- a/tcg/sparc64/tcg-target.c.inc
296
+++ b/tcg/sparc64/tcg-target.c.inc
297
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
298
299
/* Extract the page index, shifted into place for tlb index. */
300
tcg_out_arithi(s, TCG_REG_T1, addr_reg,
301
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
302
+ s->page_bits - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
303
tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
304
305
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
306
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
307
h->base = TCG_REG_T1;
308
309
/* Mask out the page offset, except for the required alignment. */
310
- compare_mask = TARGET_PAGE_MASK | a_mask;
311
+ compare_mask = s->page_mask | a_mask;
312
if (check_fit_tl(compare_mask, 13)) {
313
tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
314
} else {
315
--
316
2.34.1
diff view generated by jsdifflib
New patch
1
Disconnect guest tlb parameters from TCG compilation.
1
2
3
Reviewed-by: Anton Johansson <anjo@rev.ng>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 1 +
7
accel/tcg/translate-all.c | 1 +
8
tcg/aarch64/tcg-target.c.inc | 2 +-
9
tcg/i386/tcg-target.c.inc | 2 +-
10
4 files changed, 4 insertions(+), 2 deletions(-)
11
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg.h
15
+++ b/include/tcg/tcg.h
16
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
17
#ifdef CONFIG_SOFTMMU
18
int page_mask;
19
uint8_t page_bits;
20
+ uint8_t tlb_dyn_max_bits;
21
#endif
22
23
TCGRegSet reserved_regs;
24
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/accel/tcg/translate-all.c
27
+++ b/accel/tcg/translate-all.c
28
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
29
#ifdef CONFIG_SOFTMMU
30
tcg_ctx->page_bits = TARGET_PAGE_BITS;
31
tcg_ctx->page_mask = TARGET_PAGE_MASK;
32
+ tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
33
#endif
34
35
tb_overflow:
36
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/aarch64/tcg-target.c.inc
39
+++ b/tcg/aarch64/tcg-target.c.inc
40
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
41
ldst->oi = oi;
42
ldst->addrlo_reg = addr_reg;
43
44
- mask_type = (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32
45
+ mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
46
? TCG_TYPE_I64 : TCG_TYPE_I32);
47
48
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
49
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tcg/i386/tcg-target.c.inc
52
+++ b/tcg/i386/tcg-target.c.inc
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
55
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
56
hrexw = P_REXW;
57
- if (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32) {
58
+ if (s->page_bits + s->tlb_dyn_max_bits > 32) {
59
tlbtype = TCG_TYPE_I64;
60
tlbrexw = P_REXW;
61
}
62
--
63
2.34.1
diff view generated by jsdifflib
New patch
1
TCG will need this declaration, without all of the other
2
bits that come with cpu-all.h.
1
3
4
Reviewed-by: Thomas Huth <thuth@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/cpu-all.h | 5 +----
8
include/exec/user/guest-base.h | 12 ++++++++++++
9
tcg/tcg.c | 3 +++
10
3 files changed, 16 insertions(+), 4 deletions(-)
11
create mode 100644 include/exec/user/guest-base.h
12
13
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/cpu-all.h
16
+++ b/include/exec/cpu-all.h
17
@@ -XXX,XX +XXX,XX @@
18
19
#if defined(CONFIG_USER_ONLY)
20
#include "exec/user/abitypes.h"
21
+#include "exec/user/guest-base.h"
22
23
-/* On some host systems the guest address space is reserved on the host.
24
- * This allows the guest address space to be offset to a convenient location.
25
- */
26
-extern uintptr_t guest_base;
27
extern bool have_guest_base;
28
29
/*
30
diff --git a/include/exec/user/guest-base.h b/include/exec/user/guest-base.h
31
new file mode 100644
32
index XXXXXXX..XXXXXXX
33
--- /dev/null
34
+++ b/include/exec/user/guest-base.h
35
@@ -XXX,XX +XXX,XX @@
36
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
37
+/*
38
+ * Declaration of guest_base.
39
+ * Copyright (c) 2003 Fabrice Bellard
40
+ */
41
+
42
+#ifndef EXEC_USER_GUEST_BASE_H
43
+#define EXEC_USER_GUEST_BASE_H
44
+
45
+extern uintptr_t guest_base;
46
+
47
+#endif
48
diff --git a/tcg/tcg.c b/tcg/tcg.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/tcg/tcg.c
51
+++ b/tcg/tcg.c
52
@@ -XXX,XX +XXX,XX @@
53
#include "tcg/tcg-temp-internal.h"
54
#include "tcg-internal.h"
55
#include "accel/tcg/perf.h"
56
+#ifdef CONFIG_USER_ONLY
57
+#include "exec/user/guest-base.h"
58
+#endif
59
60
/* Forward declarations for functions declared in tcg-target.c.inc and
61
used here. */
62
--
63
2.34.1
diff view generated by jsdifflib