1
The following changes since commit 3e08b2b9cb64bff2b73fa9128c0e49bfcde0dd40:
1
The following changes since commit 8844bb8d896595ee1d25d21c770e6e6f29803097:
2
2
3
Merge remote-tracking branch 'remotes/philmd-gitlab/tags/edk2-next-20200121' into staging (2020-01-21 15:29:25 +0000)
3
Merge tag 'or1k-pull-request-20230513' of https://github.com/stffrdhrn/qemu into staging (2023-05-13 11:23:14 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200121
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230516
8
8
9
for you to fetch changes up to 75fa376cdab5e5db2c7fdd107358e16f95503ac6:
9
for you to fetch changes up to ee95d036bf4bfa10be65325a287bf3d0e8b2a0e6:
10
10
11
scripts/git.orderfile: Display decodetree before C source (2020-01-21 15:26:09 -1000)
11
tcg: Split out exec/user/guest-base.h (2023-05-16 08:11:53 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Remove another limit to NB_MMU_MODES.
14
tcg/i386: Fix tcg_out_addi_ptr for win64
15
Fix compilation using uclibc.
15
tcg: Implement atomicity for TCGv_i128
16
Fix defaulting of -accel parameters.
16
tcg: First quarter of cleanups for building tcg once
17
Tidy cputlb basic routines.
18
Adjust git.orderfile for decodetree.
19
17
20
----------------------------------------------------------------
18
----------------------------------------------------------------
21
Carlos Santos (1):
19
Richard Henderson (80):
22
util/cacheinfo: fix crash when compiling with uClibc
20
tcg/i386: Set P_REXW in tcg_out_addi_ptr
21
include/exec/memop: Add MO_ATOM_*
22
accel/tcg: Honor atomicity of loads
23
accel/tcg: Honor atomicity of stores
24
tcg: Unify helper_{be,le}_{ld,st}*
25
accel/tcg: Implement helper_{ld,st}*_mmu for user-only
26
tcg/tci: Use helper_{ld,st}*_mmu for user-only
27
tcg: Add 128-bit guest memory primitives
28
meson: Detect atomic128 support with optimization
29
tcg/i386: Add have_atomic16
30
tcg/aarch64: Detect have_lse, have_lse2 for linux
31
tcg/aarch64: Detect have_lse, have_lse2 for darwin
32
tcg/i386: Use full load/store helpers in user-only mode
33
tcg/aarch64: Use full load/store helpers in user-only mode
34
tcg/ppc: Use full load/store helpers in user-only mode
35
tcg/loongarch64: Use full load/store helpers in user-only mode
36
tcg/riscv: Use full load/store helpers in user-only mode
37
tcg/arm: Adjust constraints on qemu_ld/st
38
tcg/arm: Use full load/store helpers in user-only mode
39
tcg/mips: Use full load/store helpers in user-only mode
40
tcg/s390x: Use full load/store helpers in user-only mode
41
tcg/sparc64: Allocate %g2 as a third temporary
42
tcg/sparc64: Rename tcg_out_movi_imm13 to tcg_out_movi_s13
43
target/sparc64: Remove tcg_out_movi_s13 case from tcg_out_movi_imm32
44
tcg/sparc64: Rename tcg_out_movi_imm32 to tcg_out_movi_u32
45
tcg/sparc64: Split out tcg_out_movi_s32
46
tcg/sparc64: Use standard slow path for softmmu
47
accel/tcg: Remove helper_unaligned_{ld,st}
48
tcg/loongarch64: Check the host supports unaligned accesses
49
tcg/loongarch64: Support softmmu unaligned accesses
50
tcg/riscv: Support softmmu unaligned accesses
51
tcg: Introduce tcg_target_has_memory_bswap
52
tcg: Add INDEX_op_qemu_{ld,st}_i128
53
tcg: Introduce tcg_out_movext3
54
tcg: Merge tcg_out_helper_load_regs into caller
55
tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}
56
tcg: Introduce atom_and_align_for_opc
57
tcg/i386: Use atom_and_align_for_opc
58
tcg/aarch64: Use atom_and_align_for_opc
59
tcg/arm: Use atom_and_align_for_opc
60
tcg/loongarch64: Use atom_and_align_for_opc
61
tcg/mips: Use atom_and_align_for_opc
62
tcg/ppc: Use atom_and_align_for_opc
63
tcg/riscv: Use atom_and_align_for_opc
64
tcg/s390x: Use atom_and_align_for_opc
65
tcg/sparc64: Use atom_and_align_for_opc
66
tcg/i386: Honor 64-bit atomicity in 32-bit mode
67
tcg/i386: Support 128-bit load/store with have_atomic16
68
tcg/aarch64: Rename temporaries
69
tcg/aarch64: Support 128-bit load/store
70
tcg/ppc: Support 128-bit load/store
71
tcg/s390x: Support 128-bit load/store
72
tcg: Split out memory ops to tcg-op-ldst.c
73
tcg: Widen gen_insn_data to uint64_t
74
accel/tcg: Widen tcg-ldst.h addresses to uint64_t
75
tcg: Widen helper_{ld,st}_i128 addresses to uint64_t
76
tcg: Widen helper_atomic_* addresses to uint64_t
77
tcg: Widen tcg_gen_code pc_start argument to uint64_t
78
accel/tcg: Merge gen_mem_wrapped with plugin_gen_empty_mem_callback
79
accel/tcg: Merge do_gen_mem_cb into caller
80
tcg: Reduce copies for plugin_gen_mem_callbacks
81
accel/tcg: Widen plugin_gen_empty_mem_callback to i64
82
tcg: Add addr_type to TCGContext
83
tcg: Remove TCGv from tcg_gen_qemu_{ld,st}_*
84
tcg: Remove TCGv from tcg_gen_atomic_*
85
tcg: Split INDEX_op_qemu_{ld,st}* for guest address size
86
tcg/tci: Elimnate TARGET_LONG_BITS, target_ulong
87
tcg/i386: Always enable TCG_TARGET_HAS_extr[lh]_i64_i32
88
tcg/i386: Conditionalize tcg_out_extu_i32_i64
89
tcg/i386: Adjust type of tlb_mask
90
tcg/i386: Remove TARGET_LONG_BITS, TCG_TYPE_TL
91
tcg/arm: Remove TARGET_LONG_BITS
92
tcg/aarch64: Remove USE_GUEST_BASE
93
tcg/aarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
94
tcg/loongarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
95
tcg/mips: Remove TARGET_LONG_BITS, TCG_TYPE_TL
96
tcg: Remove TARGET_LONG_BITS, TCG_TYPE_TL
97
tcg: Add page_bits and page_mask to TCGContext
98
tcg: Add tlb_dyn_max_bits to TCGContext
99
tcg: Split out exec/user/guest-base.h
23
100
24
Philippe Mathieu-Daudé (1):
101
docs/devel/loads-stores.rst | 36 +-
25
scripts/git.orderfile: Display decodetree before C source
102
docs/devel/tcg-ops.rst | 11 +-
26
103
meson.build | 52 +-
27
Richard Henderson (14):
104
accel/tcg/tcg-runtime.h | 49 +-
28
cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
105
include/exec/cpu-all.h | 5 +-
29
vl: Remove unused variable in configure_accelerators
106
include/exec/memop.h | 37 ++
30
vl: Reduce scope of variables in configure_accelerators
107
include/exec/plugin-gen.h | 4 +-
31
vl: Remove useless test in configure_accelerators
108
include/exec/user/guest-base.h | 12 +
32
vl: Only choose enabled accelerators in configure_accelerators
109
include/qemu/cpuid.h | 18 +
33
cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
110
include/tcg/tcg-ldst.h | 72 +--
34
cputlb: Make tlb_n_entries private to cputlb.c
111
include/tcg/tcg-op.h | 273 ++++++---
35
cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
112
include/tcg/tcg-opc.h | 41 +-
36
cputlb: Hoist tlb portions in tlb_mmu_resize_locked
113
include/tcg/tcg.h | 39 +-
37
cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
114
tcg/aarch64/tcg-target-con-set.h | 2 +
38
cputlb: Split out tlb_mmu_flush_locked
115
tcg/aarch64/tcg-target.h | 15 +-
39
cputlb: Partially merge tlb_dyn_init into tlb_init
116
tcg/arm/tcg-target-con-set.h | 16 +-
40
cputlb: Initialize tlbs as flushed
117
tcg/arm/tcg-target-con-str.h | 5 +-
41
cputlb: Hoist timestamp outside of loops over tlbs
118
tcg/arm/tcg-target.h | 3 +-
42
119
tcg/i386/tcg-target.h | 13 +-
43
include/exec/cpu_ldst.h | 5 -
120
tcg/loongarch64/tcg-target.h | 3 +-
44
accel/tcg/cputlb.c | 287 +++++++++++++++++++++++++++++++++---------------
121
tcg/mips/tcg-target.h | 4 +-
45
util/cacheinfo.c | 10 +-
122
tcg/ppc/tcg-target-con-set.h | 2 +
46
vl.c | 27 +++--
123
tcg/ppc/tcg-target-con-str.h | 1 +
47
scripts/git.orderfile | 3 +
124
tcg/ppc/tcg-target.h | 4 +-
48
5 files changed, 223 insertions(+), 109 deletions(-)
125
tcg/riscv/tcg-target.h | 4 +-
49
126
tcg/s390x/tcg-target-con-set.h | 2 +
127
tcg/s390x/tcg-target.h | 4 +-
128
tcg/sparc64/tcg-target-con-set.h | 2 -
129
tcg/sparc64/tcg-target-con-str.h | 1 -
130
tcg/sparc64/tcg-target.h | 4 +-
131
tcg/tcg-internal.h | 2 +
132
tcg/tci/tcg-target.h | 4 +-
133
accel/tcg/cputlb.c | 839 ++++++++++++++++---------
134
accel/tcg/plugin-gen.c | 68 +-
135
accel/tcg/translate-all.c | 35 +-
136
accel/tcg/user-exec.c | 488 ++++++++++-----
137
tcg/optimize.c | 19 +-
138
tcg/tcg-op-ldst.c | 1234 +++++++++++++++++++++++++++++++++++++
139
tcg/tcg-op.c | 864 --------------------------
140
tcg/tcg.c | 627 +++++++++++++++----
141
tcg/tci.c | 243 +++-----
142
accel/tcg/atomic_common.c.inc | 14 +-
143
accel/tcg/ldst_atomicity.c.inc | 1262 ++++++++++++++++++++++++++++++++++++++
144
tcg/aarch64/tcg-target.c.inc | 438 ++++++++-----
145
tcg/arm/tcg-target.c.inc | 246 +++-----
146
tcg/i386/tcg-target.c.inc | 467 ++++++++++----
147
tcg/loongarch64/tcg-target.c.inc | 123 ++--
148
tcg/mips/tcg-target.c.inc | 216 +++----
149
tcg/ppc/tcg-target.c.inc | 300 +++++----
150
tcg/riscv/tcg-target.c.inc | 161 ++---
151
tcg/s390x/tcg-target.c.inc | 207 ++++---
152
tcg/sparc64/tcg-target.c.inc | 731 ++++++++--------------
153
tcg/tci/tcg-target.c.inc | 58 +-
154
tcg/meson.build | 1 +
155
54 files changed, 5988 insertions(+), 3393 deletions(-)
156
create mode 100644 include/exec/user/guest-base.h
157
create mode 100644 tcg/tcg-op-ldst.c
158
create mode 100644 accel/tcg/ldst_atomicity.c.inc
diff view generated by jsdifflib
New patch
1
The REXW bit must be set to produce a 64-bit pointer result; the
2
bit is disabled in 32-bit mode, so we can do this unconditionally.
1
3
4
Fixes: 7d9e1ee424b0 ("tcg/i386: Adjust assert in tcg_out_addi_ptr")
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1592
6
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1642
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.c.inc | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
17
{
18
/* This function is only used for passing structs by reference. */
19
tcg_debug_assert(imm == (int32_t)imm);
20
- tcg_out_modrm_offset(s, OPC_LEA, rd, rs, imm);
21
+ tcg_out_modrm_offset(s, OPC_LEA | P_REXW, rd, rs, imm);
22
}
23
24
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
25
--
26
2.34.1
diff view generated by jsdifflib
New patch
1
This field may be used to describe the precise atomicity requirements
2
of the guest, which may then be used to constrain the methods by which
3
it may be emulated by the host.
1
4
5
For instance, the AArch64 LDP (32-bit) instruction changes semantics
6
with ARMv8.4 LSE2, from
7
8
MO_64 | MO_ATOM_IFALIGN_PAIR
9
(64-bits, single-copy atomic only on 4 byte units,
10
nonatomic if not aligned by 4),
11
12
to
13
14
MO_64 | MO_ATOM_WITHIN16
15
(64-bits, single-copy atomic within a 16 byte block)
16
17
The former may be implemented with two 4 byte loads, or a single 8 byte
18
load if that happens to be efficient on the host. The latter may not
19
be implemented with two 4 byte loads and may also require a helper when
20
misaligned.
21
22
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
include/exec/memop.h | 37 +++++++++++++++++++++++++++++++++++++
26
tcg/tcg.c | 27 +++++++++++++++++++++------
27
2 files changed, 58 insertions(+), 6 deletions(-)
28
29
diff --git a/include/exec/memop.h b/include/exec/memop.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/exec/memop.h
32
+++ b/include/exec/memop.h
33
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
34
MO_ALIGN_64 = 6 << MO_ASHIFT,
35
MO_ALIGN = MO_AMASK,
36
37
+ /*
38
+ * MO_ATOM_* describes the atomicity requirements of the operation:
39
+ * MO_ATOM_IFALIGN: the operation must be single-copy atomic if it
40
+ * is aligned; if unaligned there is no atomicity.
41
+ * MO_ATOM_IFALIGN_PAIR: the entire operation may be considered to
42
+ * be a pair of half-sized operations which are packed together
43
+ * for convenience, with single-copy atomicity on each half if
44
+ * the half is aligned.
45
+ * This is the atomicity e.g. of Arm pre-FEAT_LSE2 LDP.
46
+ * MO_ATOM_WITHIN16: the operation is single-copy atomic, even if it
47
+ * is unaligned, so long as it does not cross a 16-byte boundary;
48
+ * if it crosses a 16-byte boundary there is no atomicity.
49
+ * This is the atomicity e.g. of Arm FEAT_LSE2 LDR.
50
+ * MO_ATOM_WITHIN16_PAIR: the entire operation is single-copy atomic,
51
+ * if it happens to be within a 16-byte boundary, otherwise it
52
+ * devolves to a pair of half-sized MO_ATOM_WITHIN16 operations.
53
+ * Depending on alignment, one or both will be single-copy atomic.
54
+ * This is the atomicity e.g. of Arm FEAT_LSE2 LDP.
55
+ * MO_ATOM_SUBALIGN: the operation is single-copy atomic by parts
56
+ * by the alignment. E.g. if the address is 0 mod 4, then each
57
+ * 4-byte subobject is single-copy atomic.
58
+ * This is the atomicity e.g. of IBM Power.
59
+ * MO_ATOM_NONE: the operation has no atomicity requirements.
60
+ *
61
+ * Note the default (i.e. 0) value is single-copy atomic to the
62
+ * size of the operation, if aligned. This retains the behaviour
63
+ * from before this field was introduced.
64
+ */
65
+ MO_ATOM_SHIFT = 8,
66
+ MO_ATOM_IFALIGN = 0 << MO_ATOM_SHIFT,
67
+ MO_ATOM_IFALIGN_PAIR = 1 << MO_ATOM_SHIFT,
68
+ MO_ATOM_WITHIN16 = 2 << MO_ATOM_SHIFT,
69
+ MO_ATOM_WITHIN16_PAIR = 3 << MO_ATOM_SHIFT,
70
+ MO_ATOM_SUBALIGN = 4 << MO_ATOM_SHIFT,
71
+ MO_ATOM_NONE = 5 << MO_ATOM_SHIFT,
72
+ MO_ATOM_MASK = 7 << MO_ATOM_SHIFT,
73
+
74
/* Combinations of the above, for ease of use. */
75
MO_UB = MO_8,
76
MO_UW = MO_16,
77
diff --git a/tcg/tcg.c b/tcg/tcg.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/tcg.c
80
+++ b/tcg/tcg.c
81
@@ -XXX,XX +XXX,XX @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
82
[MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
83
};
84
85
+static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
86
+ [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
87
+ [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
88
+ [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
89
+ [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
90
+ [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
91
+ [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
92
+};
93
+
94
static const char bswap_flag_name[][6] = {
95
[TCG_BSWAP_IZ] = "iz",
96
[TCG_BSWAP_OZ] = "oz",
97
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
98
case INDEX_op_qemu_ld_i64:
99
case INDEX_op_qemu_st_i64:
100
{
101
+ const char *s_al, *s_op, *s_at;
102
MemOpIdx oi = op->args[k++];
103
MemOp op = get_memop(oi);
104
unsigned ix = get_mmuidx(oi);
105
106
- if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
107
- col += ne_fprintf(f, ",$0x%x,%u", op, ix);
108
+ s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
109
+ s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
110
+ s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
111
+ op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
112
+
113
+ /* If all fields are accounted for, print symbolically. */
114
+ if (!op && s_al && s_op && s_at) {
115
+ col += ne_fprintf(f, ",%s%s%s,%u",
116
+ s_at, s_al, s_op, ix);
117
} else {
118
- const char *s_al, *s_op;
119
- s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
120
- s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
121
- col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
122
+ op = get_memop(oi);
123
+ col += ne_fprintf(f, ",$0x%x,%u", op, ix);
124
}
125
i = 1;
126
}
127
--
128
2.34.1
diff view generated by jsdifflib
1
Merge into the only caller, but at the same time split
1
Create ldst_atomicity.c.inc.
2
out tlb_mmu_init to initialize a single tlb entry.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Not required for user-only code loads, because we've ensured that
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
4
the page is read-only before beginning to translate code.
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
accel/tcg/cputlb.c | 33 ++++++++++++++++-----------------
9
accel/tcg/cputlb.c | 175 +++++++---
10
1 file changed, 16 insertions(+), 17 deletions(-)
10
accel/tcg/user-exec.c | 26 +-
11
accel/tcg/ldst_atomicity.c.inc | 566 +++++++++++++++++++++++++++++++++
12
3 files changed, 716 insertions(+), 51 deletions(-)
13
create mode 100644 accel/tcg/ldst_atomicity.c.inc
11
14
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
15
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
17
--- a/accel/tcg/cputlb.c
15
+++ b/accel/tcg/cputlb.c
18
+++ b/accel/tcg/cputlb.c
16
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
19
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
17
desc->window_max_entries = max_entries;
20
return qemu_ram_addr_from_host_nofail(p);
18
}
21
}
19
22
20
-static void tlb_dyn_init(CPUArchState *env)
23
+/* Load/store with atomicity primitives. */
24
+#include "ldst_atomicity.c.inc"
25
+
26
#ifdef CONFIG_PLUGIN
27
/*
28
* Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
29
@@ -XXX,XX +XXX,XX @@ static void validate_memop(MemOpIdx oi, MemOp expected)
30
* specifically for reading instructions from system memory. It is
31
* called by the translation loop and in some helpers where the code
32
* is disassembled. It shouldn't be called directly by guest code.
33
- */
34
-
35
-typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
36
- MemOpIdx oi, uintptr_t retaddr);
37
-
38
-static inline uint64_t QEMU_ALWAYS_INLINE
39
-load_memop(const void *haddr, MemOp op)
21
-{
40
-{
22
- int i;
41
- switch (op) {
23
-
42
- case MO_UB:
24
- for (i = 0; i < NB_MMU_MODES; i++) {
43
- return ldub_p(haddr);
25
- CPUTLBDesc *desc = &env_tlb(env)->d[i];
44
- case MO_BEUW:
26
- size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
45
- return lduw_be_p(haddr);
27
-
46
- case MO_LEUW:
28
- tlb_window_reset(desc, get_clock_realtime(), 0);
47
- return lduw_le_p(haddr);
29
- desc->n_used_entries = 0;
48
- case MO_BEUL:
30
- env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
49
- return (uint32_t)ldl_be_p(haddr);
31
- env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
50
- case MO_LEUL:
32
- env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
51
- return (uint32_t)ldl_le_p(haddr);
52
- case MO_BEUQ:
53
- return ldq_be_p(haddr);
54
- case MO_LEUQ:
55
- return ldq_le_p(haddr);
56
- default:
57
- qemu_build_not_reached();
33
- }
58
- }
34
-}
59
-}
35
-
60
-
36
/**
61
-/*
37
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
62
+ *
38
* @desc: The CPUTLBDesc portion of the TLB
63
* For the benefit of TCG generated code, we want to avoid the
39
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
64
* complication of ABI-specific return type promotion and always
40
tlb_mmu_flush_locked(desc, fast);
65
* return a value extended to the register size of the host. This is
66
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
67
return ret_be;
41
}
68
}
42
69
43
+static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
70
+/**
44
+{
71
+ * do_ld_parts_beN
45
+ size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
72
+ * @p: translation parameters
46
+
73
+ * @ret_be: accumulated data
47
+ tlb_window_reset(desc, now, 0);
74
+ *
48
+ desc->n_used_entries = 0;
75
+ * As do_ld_bytes_beN, but atomically on each aligned part.
49
+ fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
76
+ */
50
+ fast->table = g_new(CPUTLBEntry, n_entries);
77
+static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
51
+ desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
78
+{
52
+}
79
+ void *haddr = p->haddr;
53
+
80
+ int size = p->size;
54
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
81
+
82
+ do {
83
+ uint64_t x;
84
+ int n;
85
+
86
+ /*
87
+ * Find minimum of alignment and size.
88
+ * This is slightly stronger than required by MO_ATOM_SUBALIGN, which
89
+ * would have only checked the low bits of addr|size once at the start,
90
+ * but is just as easy.
91
+ */
92
+ switch (((uintptr_t)haddr | size) & 7) {
93
+ case 4:
94
+ x = cpu_to_be32(load_atomic4(haddr));
95
+ ret_be = (ret_be << 32) | x;
96
+ n = 4;
97
+ break;
98
+ case 2:
99
+ case 6:
100
+ x = cpu_to_be16(load_atomic2(haddr));
101
+ ret_be = (ret_be << 16) | x;
102
+ n = 2;
103
+ break;
104
+ default:
105
+ x = *(uint8_t *)haddr;
106
+ ret_be = (ret_be << 8) | x;
107
+ n = 1;
108
+ break;
109
+ case 0:
110
+ g_assert_not_reached();
111
+ }
112
+ haddr += n;
113
+ size -= n;
114
+ } while (size != 0);
115
+ return ret_be;
116
+}
117
+
118
+/**
119
+ * do_ld_parts_be4
120
+ * @p: translation parameters
121
+ * @ret_be: accumulated data
122
+ *
123
+ * As do_ld_bytes_beN, but with one atomic load.
124
+ * Four aligned bytes are guaranteed to cover the load.
125
+ */
126
+static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
127
+{
128
+ int o = p->addr & 3;
129
+ uint32_t x = load_atomic4(p->haddr - o);
130
+
131
+ x = cpu_to_be32(x);
132
+ x <<= o * 8;
133
+ x >>= (4 - p->size) * 8;
134
+ return (ret_be << (p->size * 8)) | x;
135
+}
136
+
137
+/**
138
+ * do_ld_parts_be8
139
+ * @p: translation parameters
140
+ * @ret_be: accumulated data
141
+ *
142
+ * As do_ld_bytes_beN, but with one atomic load.
143
+ * Eight aligned bytes are guaranteed to cover the load.
144
+ */
145
+static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
146
+ MMULookupPageData *p, uint64_t ret_be)
147
+{
148
+ int o = p->addr & 7;
149
+ uint64_t x = load_atomic8_or_exit(env, ra, p->haddr - o);
150
+
151
+ x = cpu_to_be64(x);
152
+ x <<= o * 8;
153
+ x >>= (8 - p->size) * 8;
154
+ return (ret_be << (p->size * 8)) | x;
155
+}
156
+
157
/*
158
* Wrapper for the above.
159
*/
160
static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
161
- uint64_t ret_be, int mmu_idx,
162
- MMUAccessType type, uintptr_t ra)
163
+ uint64_t ret_be, int mmu_idx, MMUAccessType type,
164
+ MemOp mop, uintptr_t ra)
55
{
165
{
56
env_tlb(env)->d[mmu_idx].n_used_entries++;
166
+ MemOp atom;
57
@@ -XXX,XX +XXX,XX @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
167
+ unsigned tmp, half_size;
58
void tlb_init(CPUState *cpu)
168
+
169
if (unlikely(p->flags & TLB_MMIO)) {
170
return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
171
- } else {
172
+ }
173
+
174
+ /*
175
+ * It is a given that we cross a page and therefore there is no
176
+ * atomicity for the load as a whole, but subobjects may need attention.
177
+ */
178
+ atom = mop & MO_ATOM_MASK;
179
+ switch (atom) {
180
+ case MO_ATOM_SUBALIGN:
181
+ return do_ld_parts_beN(p, ret_be);
182
+
183
+ case MO_ATOM_IFALIGN_PAIR:
184
+ case MO_ATOM_WITHIN16_PAIR:
185
+ tmp = mop & MO_SIZE;
186
+ tmp = tmp ? tmp - 1 : 0;
187
+ half_size = 1 << tmp;
188
+ if (atom == MO_ATOM_IFALIGN_PAIR
189
+ ? p->size == half_size
190
+ : p->size >= half_size) {
191
+ if (!HAVE_al8_fast && p->size < 4) {
192
+ return do_ld_whole_be4(p, ret_be);
193
+ } else {
194
+ return do_ld_whole_be8(env, ra, p, ret_be);
195
+ }
196
+ }
197
+ /* fall through */
198
+
199
+ case MO_ATOM_IFALIGN:
200
+ case MO_ATOM_WITHIN16:
201
+ case MO_ATOM_NONE:
202
return do_ld_bytes_beN(p, ret_be);
203
+
204
+ default:
205
+ g_assert_not_reached();
206
}
207
}
208
209
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
210
}
211
212
/* Perform the load host endian, then swap if necessary. */
213
- ret = load_memop(p->haddr, MO_UW);
214
+ ret = load_atom_2(env, ra, p->haddr, memop);
215
if (memop & MO_BSWAP) {
216
ret = bswap16(ret);
217
}
218
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
219
}
220
221
/* Perform the load host endian. */
222
- ret = load_memop(p->haddr, MO_UL);
223
+ ret = load_atom_4(env, ra, p->haddr, memop);
224
if (memop & MO_BSWAP) {
225
ret = bswap32(ret);
226
}
227
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
228
}
229
230
/* Perform the load host endian. */
231
- ret = load_memop(p->haddr, MO_UQ);
232
+ ret = load_atom_8(env, ra, p->haddr, memop);
233
if (memop & MO_BSWAP) {
234
ret = bswap64(ret);
235
}
236
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
237
return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
238
}
239
240
- ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
241
- ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
242
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
243
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
244
if ((l.memop & MO_BSWAP) == MO_LE) {
245
ret = bswap32(ret);
246
}
247
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
248
return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
249
}
250
251
- ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
252
- ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
253
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
254
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
255
if ((l.memop & MO_BSWAP) == MO_LE) {
256
ret = bswap64(ret);
257
}
258
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
259
index XXXXXXX..XXXXXXX 100644
260
--- a/accel/tcg/user-exec.c
261
+++ b/accel/tcg/user-exec.c
262
@@ -XXX,XX +XXX,XX @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
263
return ret;
264
}
265
266
+#include "ldst_atomicity.c.inc"
267
+
268
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
269
MemOpIdx oi, uintptr_t ra)
59
{
270
{
60
CPUArchState *env = cpu->env_ptr;
271
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
61
+ int64_t now = get_clock_realtime();
272
62
+ int i;
273
validate_memop(oi, MO_BEUW);
63
274
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
64
qemu_spin_init(&env_tlb(env)->c.lock);
275
- ret = lduw_be_p(haddr);
65
276
+ ret = load_atom_2(env, ra, haddr, get_memop(oi));
66
/* Ensure that cpu_reset performs a full flush. */
277
clear_helper_retaddr();
67
env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
278
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
68
279
- return ret;
69
- tlb_dyn_init(env);
280
+ return cpu_to_be16(ret);
70
+ for (i = 0; i < NB_MMU_MODES; i++) {
71
+ tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
72
+ }
73
}
281
}
74
282
75
/* flush_all_helper: run fn across all cpus
283
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
284
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
285
286
validate_memop(oi, MO_BEUL);
287
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
288
- ret = ldl_be_p(haddr);
289
+ ret = load_atom_4(env, ra, haddr, get_memop(oi));
290
clear_helper_retaddr();
291
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
292
- return ret;
293
+ return cpu_to_be32(ret);
294
}
295
296
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
297
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
298
299
validate_memop(oi, MO_BEUQ);
300
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
301
- ret = ldq_be_p(haddr);
302
+ ret = load_atom_8(env, ra, haddr, get_memop(oi));
303
clear_helper_retaddr();
304
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
305
- return ret;
306
+ return cpu_to_be64(ret);
307
}
308
309
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
310
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
311
312
validate_memop(oi, MO_LEUW);
313
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
314
- ret = lduw_le_p(haddr);
315
+ ret = load_atom_2(env, ra, haddr, get_memop(oi));
316
clear_helper_retaddr();
317
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
318
- return ret;
319
+ return cpu_to_le16(ret);
320
}
321
322
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
323
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
324
325
validate_memop(oi, MO_LEUL);
326
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
327
- ret = ldl_le_p(haddr);
328
+ ret = load_atom_4(env, ra, haddr, get_memop(oi));
329
clear_helper_retaddr();
330
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
331
- return ret;
332
+ return cpu_to_le32(ret);
333
}
334
335
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
336
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
337
338
validate_memop(oi, MO_LEUQ);
339
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
340
- ret = ldq_le_p(haddr);
341
+ ret = load_atom_8(env, ra, haddr, get_memop(oi));
342
clear_helper_retaddr();
343
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
344
- return ret;
345
+ return cpu_to_le64(ret);
346
}
347
348
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
349
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
350
new file mode 100644
351
index XXXXXXX..XXXXXXX
352
--- /dev/null
353
+++ b/accel/tcg/ldst_atomicity.c.inc
354
@@ -XXX,XX +XXX,XX @@
355
+/*
356
+ * Routines common to user and system emulation of load/store.
357
+ *
358
+ * Copyright (c) 2022 Linaro, Ltd.
359
+ *
360
+ * SPDX-License-Identifier: GPL-2.0-or-later
361
+ *
362
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
363
+ * See the COPYING file in the top-level directory.
364
+ */
365
+
366
+#ifdef CONFIG_ATOMIC64
367
+# define HAVE_al8 true
368
+#else
369
+# define HAVE_al8 false
370
+#endif
371
+#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
372
+
373
+#if defined(CONFIG_ATOMIC128)
374
+# define HAVE_al16_fast true
375
+#else
376
+# define HAVE_al16_fast false
377
+#endif
378
+
379
+/**
380
+ * required_atomicity:
381
+ *
382
+ * Return the lg2 bytes of atomicity required by @memop for @p.
383
+ * If the operation must be split into two operations to be
384
+ * examined separately for atomicity, return -lg2.
385
+ */
386
+static int required_atomicity(CPUArchState *env, uintptr_t p, MemOp memop)
387
+{
388
+ MemOp atom = memop & MO_ATOM_MASK;
389
+ MemOp size = memop & MO_SIZE;
390
+ MemOp half = size ? size - 1 : 0;
391
+ unsigned tmp;
392
+ int atmax;
393
+
394
+ switch (atom) {
395
+ case MO_ATOM_NONE:
396
+ atmax = MO_8;
397
+ break;
398
+
399
+ case MO_ATOM_IFALIGN_PAIR:
400
+ size = half;
401
+ /* fall through */
402
+
403
+ case MO_ATOM_IFALIGN:
404
+ tmp = (1 << size) - 1;
405
+ atmax = p & tmp ? MO_8 : size;
406
+ break;
407
+
408
+ case MO_ATOM_WITHIN16:
409
+ tmp = p & 15;
410
+ atmax = (tmp + (1 << size) <= 16 ? size : MO_8);
411
+ break;
412
+
413
+ case MO_ATOM_WITHIN16_PAIR:
414
+ tmp = p & 15;
415
+ if (tmp + (1 << size) <= 16) {
416
+ atmax = size;
417
+ } else if (tmp + (1 << half) == 16) {
418
+ /*
419
+ * The pair exactly straddles the boundary.
420
+ * Both halves are naturally aligned and atomic.
421
+ */
422
+ atmax = half;
423
+ } else {
424
+ /*
425
+ * One of the pair crosses the boundary, and is non-atomic.
426
+ * The other of the pair does not cross, and is atomic.
427
+ */
428
+ atmax = -half;
429
+ }
430
+ break;
431
+
432
+ case MO_ATOM_SUBALIGN:
433
+ /*
434
+ * Examine the alignment of p to determine if there are subobjects
435
+ * that must be aligned. Note that we only really need ctz4() --
436
+ * any more sigificant bits are discarded by the immediately
437
+ * following comparison.
438
+ */
439
+ tmp = ctz32(p);
440
+ atmax = MIN(size, tmp);
441
+ break;
442
+
443
+ default:
444
+ g_assert_not_reached();
445
+ }
446
+
447
+ /*
448
+ * Here we have the architectural atomicity of the operation.
449
+ * However, when executing in a serial context, we need no extra
450
+ * host atomicity in order to avoid racing. This reduction
451
+ * avoids looping with cpu_loop_exit_atomic.
452
+ */
453
+ if (cpu_in_serial_context(env_cpu(env))) {
454
+ return MO_8;
455
+ }
456
+ return atmax;
457
+}
458
+
459
+/**
460
+ * load_atomic2:
461
+ * @pv: host address
462
+ *
463
+ * Atomically load 2 aligned bytes from @pv.
464
+ */
465
+static inline uint16_t load_atomic2(void *pv)
466
+{
467
+ uint16_t *p = __builtin_assume_aligned(pv, 2);
468
+ return qatomic_read(p);
469
+}
470
+
471
+/**
472
+ * load_atomic4:
473
+ * @pv: host address
474
+ *
475
+ * Atomically load 4 aligned bytes from @pv.
476
+ */
477
+static inline uint32_t load_atomic4(void *pv)
478
+{
479
+ uint32_t *p = __builtin_assume_aligned(pv, 4);
480
+ return qatomic_read(p);
481
+}
482
+
483
+/**
484
+ * load_atomic8:
485
+ * @pv: host address
486
+ *
487
+ * Atomically load 8 aligned bytes from @pv.
488
+ */
489
+static inline uint64_t load_atomic8(void *pv)
490
+{
491
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
492
+
493
+ qemu_build_assert(HAVE_al8);
494
+ return qatomic_read__nocheck(p);
495
+}
496
+
497
+/**
498
+ * load_atomic16:
499
+ * @pv: host address
500
+ *
501
+ * Atomically load 16 aligned bytes from @pv.
502
+ */
503
+static inline Int128 load_atomic16(void *pv)
504
+{
505
+#ifdef CONFIG_ATOMIC128
506
+ __uint128_t *p = __builtin_assume_aligned(pv, 16);
507
+ Int128Alias r;
508
+
509
+ r.u = qatomic_read__nocheck(p);
510
+ return r.s;
511
+#else
512
+ qemu_build_not_reached();
513
+#endif
514
+}
515
+
516
+/**
517
+ * load_atomic8_or_exit:
518
+ * @env: cpu context
519
+ * @ra: host unwind address
520
+ * @pv: host address
521
+ *
522
+ * Atomically load 8 aligned bytes from @pv.
523
+ * If this is not possible, longjmp out to restart serially.
524
+ */
525
+static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
526
+{
527
+ if (HAVE_al8) {
528
+ return load_atomic8(pv);
529
+ }
530
+
531
+#ifdef CONFIG_USER_ONLY
532
+ /*
533
+ * If the page is not writable, then assume the value is immutable
534
+ * and requires no locking. This ignores the case of MAP_SHARED with
535
+ * another process, because the fallback start_exclusive solution
536
+ * provides no protection across processes.
537
+ */
538
+ if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) {
539
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
540
+ return *p;
541
+ }
542
+#endif
543
+
544
+ /* Ultimate fallback: re-execute in serial context. */
545
+ cpu_loop_exit_atomic(env_cpu(env), ra);
546
+}
547
+
548
+/**
549
+ * load_atomic16_or_exit:
550
+ * @env: cpu context
551
+ * @ra: host unwind address
552
+ * @pv: host address
553
+ *
554
+ * Atomically load 16 aligned bytes from @pv.
555
+ * If this is not possible, longjmp out to restart serially.
556
+ */
557
+static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
558
+{
559
+ Int128 *p = __builtin_assume_aligned(pv, 16);
560
+
561
+ if (HAVE_al16_fast) {
562
+ return load_atomic16(p);
563
+ }
564
+
565
+#ifdef CONFIG_USER_ONLY
566
+ /*
567
+ * We can only use cmpxchg to emulate a load if the page is writable.
568
+ * If the page is not writable, then assume the value is immutable
569
+ * and requires no locking. This ignores the case of MAP_SHARED with
570
+ * another process, because the fallback start_exclusive solution
571
+ * provides no protection across processes.
572
+ */
573
+ if (!page_check_range(h2g(p), 16, PAGE_WRITE)) {
574
+ return *p;
575
+ }
576
+#endif
577
+
578
+ /*
579
+ * In system mode all guest pages are writable, and for user-only
580
+ * we have just checked writability. Try cmpxchg.
581
+ */
582
+#if defined(CONFIG_CMPXCHG128)
583
+ /* Swap 0 with 0, with the side-effect of returning the old value. */
584
+ {
585
+ Int128Alias r;
586
+ r.u = __sync_val_compare_and_swap_16((__uint128_t *)p, 0, 0);
587
+ return r.s;
588
+ }
589
+#endif
590
+
591
+ /* Ultimate fallback: re-execute in serial context. */
592
+ cpu_loop_exit_atomic(env_cpu(env), ra);
593
+}
594
+
595
+/**
596
+ * load_atom_extract_al4x2:
597
+ * @pv: host address
598
+ *
599
+ * Load 4 bytes from @p, from two sequential atomic 4-byte loads.
600
+ */
601
+static uint32_t load_atom_extract_al4x2(void *pv)
602
+{
603
+ uintptr_t pi = (uintptr_t)pv;
604
+ int sh = (pi & 3) * 8;
605
+ uint32_t a, b;
606
+
607
+ pv = (void *)(pi & ~3);
608
+ a = load_atomic4(pv);
609
+ b = load_atomic4(pv + 4);
610
+
611
+ if (HOST_BIG_ENDIAN) {
612
+ return (a << sh) | (b >> (-sh & 31));
613
+ } else {
614
+ return (a >> sh) | (b << (-sh & 31));
615
+ }
616
+}
617
+
618
+/**
619
+ * load_atom_extract_al8x2:
620
+ * @pv: host address
621
+ *
622
+ * Load 8 bytes from @p, from two sequential atomic 8-byte loads.
623
+ */
624
+static uint64_t load_atom_extract_al8x2(void *pv)
625
+{
626
+ uintptr_t pi = (uintptr_t)pv;
627
+ int sh = (pi & 7) * 8;
628
+ uint64_t a, b;
629
+
630
+ pv = (void *)(pi & ~7);
631
+ a = load_atomic8(pv);
632
+ b = load_atomic8(pv + 8);
633
+
634
+ if (HOST_BIG_ENDIAN) {
635
+ return (a << sh) | (b >> (-sh & 63));
636
+ } else {
637
+ return (a >> sh) | (b << (-sh & 63));
638
+ }
639
+}
640
+
641
+/**
642
+ * load_atom_extract_al8_or_exit:
643
+ * @env: cpu context
644
+ * @ra: host unwind address
645
+ * @pv: host address
646
+ * @s: object size in bytes, @s <= 4.
647
+ *
648
+ * Atomically load @s bytes from @p, when p % s != 0, and [p, p+s-1] does
649
+ * not cross an 8-byte boundary. This means that we can perform an atomic
650
+ * 8-byte load and extract.
651
+ * The value is returned in the low bits of a uint32_t.
652
+ */
653
+static uint32_t load_atom_extract_al8_or_exit(CPUArchState *env, uintptr_t ra,
654
+ void *pv, int s)
655
+{
656
+ uintptr_t pi = (uintptr_t)pv;
657
+ int o = pi & 7;
658
+ int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
659
+
660
+ pv = (void *)(pi & ~7);
661
+ return load_atomic8_or_exit(env, ra, pv) >> shr;
662
+}
663
+
664
+/**
665
+ * load_atom_extract_al16_or_exit:
666
+ * @env: cpu context
667
+ * @ra: host unwind address
668
+ * @p: host address
669
+ * @s: object size in bytes, @s <= 8.
670
+ *
671
+ * Atomically load @s bytes from @p, when p % 16 < 8
672
+ * and p % 16 + s > 8. I.e. does not cross a 16-byte
673
+ * boundary, but *does* cross an 8-byte boundary.
674
+ * This is the slow version, so we must have eliminated
675
+ * any faster load_atom_extract_al8_or_exit case.
676
+ *
677
+ * If this is not possible, longjmp out to restart serially.
678
+ */
679
+static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
680
+ void *pv, int s)
681
+{
682
+ uintptr_t pi = (uintptr_t)pv;
683
+ int o = pi & 7;
684
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
685
+ Int128 r;
686
+
687
+ /*
688
+ * Note constraints above: p & 8 must be clear.
689
+ * Provoke SIGBUS if possible otherwise.
690
+ */
691
+ pv = (void *)(pi & ~7);
692
+ r = load_atomic16_or_exit(env, ra, pv);
693
+
694
+ r = int128_urshift(r, shr);
695
+ return int128_getlo(r);
696
+}
697
+
698
+/**
699
+ * load_atom_extract_al16_or_al8:
700
+ * @p: host address
701
+ * @s: object size in bytes, @s <= 8.
702
+ *
703
+ * Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not
704
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
705
+ * otherwise the access must be 8-byte atomic.
706
+ */
707
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
708
+{
709
+#if defined(CONFIG_ATOMIC128)
710
+ uintptr_t pi = (uintptr_t)pv;
711
+ int o = pi & 7;
712
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
713
+ __uint128_t r;
714
+
715
+ pv = (void *)(pi & ~7);
716
+ if (pi & 8) {
717
+ uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
718
+ uint64_t a = qatomic_read__nocheck(p8);
719
+ uint64_t b = qatomic_read__nocheck(p8 + 1);
720
+
721
+ if (HOST_BIG_ENDIAN) {
722
+ r = ((__uint128_t)a << 64) | b;
723
+ } else {
724
+ r = ((__uint128_t)b << 64) | a;
725
+ }
726
+ } else {
727
+ __uint128_t *p16 = __builtin_assume_aligned(pv, 16, 0);
728
+ r = qatomic_read__nocheck(p16);
729
+ }
730
+ return r >> shr;
731
+#else
732
+ qemu_build_not_reached();
733
+#endif
734
+}
735
+
736
+/**
737
+ * load_atom_4_by_2:
738
+ * @pv: host address
739
+ *
740
+ * Load 4 bytes from @pv, with two 2-byte atomic loads.
741
+ */
742
+static inline uint32_t load_atom_4_by_2(void *pv)
743
+{
744
+ uint32_t a = load_atomic2(pv);
745
+ uint32_t b = load_atomic2(pv + 2);
746
+
747
+ if (HOST_BIG_ENDIAN) {
748
+ return (a << 16) | b;
749
+ } else {
750
+ return (b << 16) | a;
751
+ }
752
+}
753
+
754
+/**
755
+ * load_atom_8_by_2:
756
+ * @pv: host address
757
+ *
758
+ * Load 8 bytes from @pv, with four 2-byte atomic loads.
759
+ */
760
+static inline uint64_t load_atom_8_by_2(void *pv)
761
+{
762
+ uint32_t a = load_atom_4_by_2(pv);
763
+ uint32_t b = load_atom_4_by_2(pv + 4);
764
+
765
+ if (HOST_BIG_ENDIAN) {
766
+ return ((uint64_t)a << 32) | b;
767
+ } else {
768
+ return ((uint64_t)b << 32) | a;
769
+ }
770
+}
771
+
772
+/**
773
+ * load_atom_8_by_4:
774
+ * @pv: host address
775
+ *
776
+ * Load 8 bytes from @pv, with two 4-byte atomic loads.
777
+ */
778
+static inline uint64_t load_atom_8_by_4(void *pv)
779
+{
780
+ uint32_t a = load_atomic4(pv);
781
+ uint32_t b = load_atomic4(pv + 4);
782
+
783
+ if (HOST_BIG_ENDIAN) {
784
+ return ((uint64_t)a << 32) | b;
785
+ } else {
786
+ return ((uint64_t)b << 32) | a;
787
+ }
788
+}
789
+
790
+/**
791
+ * load_atom_2:
792
+ * @p: host address
793
+ * @memop: the full memory op
794
+ *
795
+ * Load 2 bytes from @p, honoring the atomicity of @memop.
796
+ */
797
+static uint16_t load_atom_2(CPUArchState *env, uintptr_t ra,
798
+ void *pv, MemOp memop)
799
+{
800
+ uintptr_t pi = (uintptr_t)pv;
801
+ int atmax;
802
+
803
+ if (likely((pi & 1) == 0)) {
804
+ return load_atomic2(pv);
805
+ }
806
+ if (HAVE_al16_fast) {
807
+ return load_atom_extract_al16_or_al8(pv, 2);
808
+ }
809
+
810
+ atmax = required_atomicity(env, pi, memop);
811
+ switch (atmax) {
812
+ case MO_8:
813
+ return lduw_he_p(pv);
814
+ case MO_16:
815
+ /* The only case remaining is MO_ATOM_WITHIN16. */
816
+ if (!HAVE_al8_fast && (pi & 3) == 1) {
817
+ /* Big or little endian, we want the middle two bytes. */
818
+ return load_atomic4(pv - 1) >> 8;
819
+ }
820
+ if ((pi & 15) != 7) {
821
+ return load_atom_extract_al8_or_exit(env, ra, pv, 2);
822
+ }
823
+ return load_atom_extract_al16_or_exit(env, ra, pv, 2);
824
+ default:
825
+ g_assert_not_reached();
826
+ }
827
+}
828
+
829
+/**
830
+ * load_atom_4:
831
+ * @p: host address
832
+ * @memop: the full memory op
833
+ *
834
+ * Load 4 bytes from @p, honoring the atomicity of @memop.
835
+ */
836
+static uint32_t load_atom_4(CPUArchState *env, uintptr_t ra,
837
+ void *pv, MemOp memop)
838
+{
839
+ uintptr_t pi = (uintptr_t)pv;
840
+ int atmax;
841
+
842
+ if (likely((pi & 3) == 0)) {
843
+ return load_atomic4(pv);
844
+ }
845
+ if (HAVE_al16_fast) {
846
+ return load_atom_extract_al16_or_al8(pv, 4);
847
+ }
848
+
849
+ atmax = required_atomicity(env, pi, memop);
850
+ switch (atmax) {
851
+ case MO_8:
852
+ case MO_16:
853
+ case -MO_16:
854
+ /*
855
+ * For MO_ATOM_IFALIGN, this is more atomicity than required,
856
+ * but it's trivially supported on all hosts, better than 4
857
+ * individual byte loads (when the host requires alignment),
858
+ * and overlaps with the MO_ATOM_SUBALIGN case of p % 2 == 0.
859
+ */
860
+ return load_atom_extract_al4x2(pv);
861
+ case MO_32:
862
+ if (!(pi & 4)) {
863
+ return load_atom_extract_al8_or_exit(env, ra, pv, 4);
864
+ }
865
+ return load_atom_extract_al16_or_exit(env, ra, pv, 4);
866
+ default:
867
+ g_assert_not_reached();
868
+ }
869
+}
870
+
871
+/**
872
+ * load_atom_8:
873
+ * @p: host address
874
+ * @memop: the full memory op
875
+ *
876
+ * Load 8 bytes from @p, honoring the atomicity of @memop.
877
+ */
878
+static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
879
+ void *pv, MemOp memop)
880
+{
881
+ uintptr_t pi = (uintptr_t)pv;
882
+ int atmax;
883
+
884
+ /*
885
+ * If the host does not support 8-byte atomics, wait until we have
886
+ * examined the atomicity parameters below.
887
+ */
888
+ if (HAVE_al8 && likely((pi & 7) == 0)) {
889
+ return load_atomic8(pv);
890
+ }
891
+ if (HAVE_al16_fast) {
892
+ return load_atom_extract_al16_or_al8(pv, 8);
893
+ }
894
+
895
+ atmax = required_atomicity(env, pi, memop);
896
+ if (atmax == MO_64) {
897
+ if (!HAVE_al8 && (pi & 7) == 0) {
898
+ load_atomic8_or_exit(env, ra, pv);
899
+ }
900
+ return load_atom_extract_al16_or_exit(env, ra, pv, 8);
901
+ }
902
+ if (HAVE_al8_fast) {
903
+ return load_atom_extract_al8x2(pv);
904
+ }
905
+ switch (atmax) {
906
+ case MO_8:
907
+ return ldq_he_p(pv);
908
+ case MO_16:
909
+ return load_atom_8_by_2(pv);
910
+ case MO_32:
911
+ return load_atom_8_by_4(pv);
912
+ case -MO_32:
913
+ if (HAVE_al8) {
914
+ return load_atom_extract_al8x2(pv);
915
+ }
916
+ cpu_loop_exit_atomic(env_cpu(env), ra);
917
+ default:
918
+ g_assert_not_reached();
919
+ }
920
+}
76
--
921
--
77
2.20.1
922
2.34.1
78
79
diff view generated by jsdifflib
1
No functional change, but the smaller expressions make
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
the code easier to read.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
3
---
9
accel/tcg/cputlb.c | 35 +++++++++++++++++------------------
4
accel/tcg/cputlb.c | 108 ++++----
10
1 file changed, 17 insertions(+), 18 deletions(-)
5
accel/tcg/user-exec.c | 12 +-
6
accel/tcg/ldst_atomicity.c.inc | 491 +++++++++++++++++++++++++++++++++
7
3 files changed, 545 insertions(+), 66 deletions(-)
11
8
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
9
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
11
--- a/accel/tcg/cputlb.c
15
+++ b/accel/tcg/cputlb.c
12
+++ b/accel/tcg/cputlb.c
16
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
13
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
17
14
* Store Helpers
15
*/
16
17
-static inline void QEMU_ALWAYS_INLINE
18
-store_memop(void *haddr, uint64_t val, MemOp op)
19
-{
20
- switch (op) {
21
- case MO_UB:
22
- stb_p(haddr, val);
23
- break;
24
- case MO_BEUW:
25
- stw_be_p(haddr, val);
26
- break;
27
- case MO_LEUW:
28
- stw_le_p(haddr, val);
29
- break;
30
- case MO_BEUL:
31
- stl_be_p(haddr, val);
32
- break;
33
- case MO_LEUL:
34
- stl_le_p(haddr, val);
35
- break;
36
- case MO_BEUQ:
37
- stq_be_p(haddr, val);
38
- break;
39
- case MO_LEUQ:
40
- stq_le_p(haddr, val);
41
- break;
42
- default:
43
- qemu_build_not_reached();
44
- }
45
-}
46
-
18
/**
47
/**
19
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
48
* do_st_mmio_leN:
20
- * @env: CPU that owns the TLB
49
* @env: cpu context
21
- * @mmu_idx: MMU index of the TLB
50
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
22
+ * @desc: The CPUTLBDesc portion of the TLB
51
return val_le;
23
+ * @fast: The CPUTLBDescFast portion of the same TLB
52
}
24
*
53
25
* Called with tlb_lock_held.
54
-/**
26
*
55
- * do_st_bytes_leN:
27
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
56
- * @p: translation parameters
28
* high), since otherwise we are likely to have a significant amount of
57
- * @val_le: data to store
29
* conflict misses.
58
- *
59
- * Store @p->size bytes at @p->haddr, which is RAM.
60
- * The bytes to store are extracted in little-endian order from @val_le;
61
- * return the bytes of @val_le beyond @p->size that have not been stored.
62
- */
63
-static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
64
-{
65
- uint8_t *haddr = p->haddr;
66
- int i, size = p->size;
67
-
68
- for (i = 0; i < size; i++, val_le >>= 8) {
69
- haddr[i] = val_le;
70
- }
71
- return val_le;
72
-}
73
-
74
/*
75
* Wrapper for the above.
30
*/
76
*/
31
-static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
77
static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
32
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
78
- uint64_t val_le, int mmu_idx, uintptr_t ra)
79
+ uint64_t val_le, int mmu_idx,
80
+ MemOp mop, uintptr_t ra)
33
{
81
{
34
- CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
82
+ MemOp atom;
35
- size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
83
+ unsigned tmp, half_size;
36
+ size_t old_size = tlb_n_entries(fast);
84
+
37
size_t rate;
85
if (unlikely(p->flags & TLB_MMIO)) {
38
size_t new_size = old_size;
86
return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
39
int64_t now = get_clock_realtime();
87
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
40
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
88
return val_le >> (p->size * 8);
41
return;
89
- } else {
90
- return do_st_bytes_leN(p, val_le);
91
+ }
92
+
93
+ /*
94
+ * It is a given that we cross a page and therefore there is no atomicity
95
+ * for the store as a whole, but subobjects may need attention.
96
+ */
97
+ atom = mop & MO_ATOM_MASK;
98
+ switch (atom) {
99
+ case MO_ATOM_SUBALIGN:
100
+ return store_parts_leN(p->haddr, p->size, val_le);
101
+
102
+ case MO_ATOM_IFALIGN_PAIR:
103
+ case MO_ATOM_WITHIN16_PAIR:
104
+ tmp = mop & MO_SIZE;
105
+ tmp = tmp ? tmp - 1 : 0;
106
+ half_size = 1 << tmp;
107
+ if (atom == MO_ATOM_IFALIGN_PAIR
108
+ ? p->size == half_size
109
+ : p->size >= half_size) {
110
+ if (!HAVE_al8_fast && p->size <= 4) {
111
+ return store_whole_le4(p->haddr, p->size, val_le);
112
+ } else if (HAVE_al8) {
113
+ return store_whole_le8(p->haddr, p->size, val_le);
114
+ } else {
115
+ cpu_loop_exit_atomic(env_cpu(env), ra);
116
+ }
117
+ }
118
+ /* fall through */
119
+
120
+ case MO_ATOM_IFALIGN:
121
+ case MO_ATOM_WITHIN16:
122
+ case MO_ATOM_NONE:
123
+ return store_bytes_leN(p->haddr, p->size, val_le);
124
+
125
+ default:
126
+ g_assert_not_reached();
42
}
127
}
43
128
}
44
- g_free(env_tlb(env)->f[mmu_idx].table);
129
45
- g_free(env_tlb(env)->d[mmu_idx].iotlb);
130
@@ -XXX,XX +XXX,XX @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
46
+ g_free(fast->table);
131
if (memop & MO_BSWAP) {
47
+ g_free(desc->iotlb);
132
val = bswap16(val);
48
49
tlb_window_reset(desc, now, 0);
50
/* desc->n_used_entries is cleared by the caller */
51
- env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
52
- env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
53
- env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
54
+ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
55
+ fast->table = g_try_new(CPUTLBEntry, new_size);
56
+ desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
57
+
58
/*
59
* If the allocations fail, try smaller sizes. We just freed some
60
* memory, so going back to half of new_size has a good chance of working.
61
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
62
* allocations to fail though, so we progressively reduce the allocation
63
* size, aborting if we cannot even allocate the smallest TLB we support.
64
*/
65
- while (env_tlb(env)->f[mmu_idx].table == NULL ||
66
- env_tlb(env)->d[mmu_idx].iotlb == NULL) {
67
+ while (fast->table == NULL || desc->iotlb == NULL) {
68
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
69
error_report("%s: %s", __func__, strerror(errno));
70
abort();
71
}
133
}
72
new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
134
- store_memop(p->haddr, val, MO_UW);
73
- env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
135
+ store_atom_2(env, ra, p->haddr, memop, val);
74
+ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
75
76
- g_free(env_tlb(env)->f[mmu_idx].table);
77
- g_free(env_tlb(env)->d[mmu_idx].iotlb);
78
- env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
79
- env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
80
+ g_free(fast->table);
81
+ g_free(desc->iotlb);
82
+ fast->table = g_try_new(CPUTLBEntry, new_size);
83
+ desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
84
}
136
}
85
}
137
}
86
138
87
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
139
@@ -XXX,XX +XXX,XX @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
88
{
140
if (memop & MO_BSWAP) {
89
- tlb_mmu_resize_locked(env, mmu_idx);
141
val = bswap32(val);
90
+ tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
142
}
91
env_tlb(env)->d[mmu_idx].n_used_entries = 0;
143
- store_memop(p->haddr, val, MO_UL);
92
env_tlb(env)->d[mmu_idx].large_page_addr = -1;
144
+ store_atom_4(env, ra, p->haddr, memop, val);
93
env_tlb(env)->d[mmu_idx].large_page_mask = -1;
145
}
146
}
147
148
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
149
if (memop & MO_BSWAP) {
150
val = bswap64(val);
151
}
152
- store_memop(p->haddr, val, MO_UQ);
153
+ store_atom_8(env, ra, p->haddr, memop, val);
154
}
155
}
156
157
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
158
if ((l.memop & MO_BSWAP) != MO_LE) {
159
val = bswap32(val);
160
}
161
- val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
162
- (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
163
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
164
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
165
}
166
167
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
168
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
169
if ((l.memop & MO_BSWAP) != MO_LE) {
170
val = bswap64(val);
171
}
172
- val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
173
- (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
174
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
175
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
176
}
177
178
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
179
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
180
index XXXXXXX..XXXXXXX 100644
181
--- a/accel/tcg/user-exec.c
182
+++ b/accel/tcg/user-exec.c
183
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
184
185
validate_memop(oi, MO_BEUW);
186
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
187
- stw_be_p(haddr, val);
188
+ store_atom_2(env, ra, haddr, get_memop(oi), be16_to_cpu(val));
189
clear_helper_retaddr();
190
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
191
}
192
@@ -XXX,XX +XXX,XX @@ void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
193
194
validate_memop(oi, MO_BEUL);
195
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
196
- stl_be_p(haddr, val);
197
+ store_atom_4(env, ra, haddr, get_memop(oi), be32_to_cpu(val));
198
clear_helper_retaddr();
199
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
200
}
201
@@ -XXX,XX +XXX,XX @@ void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
202
203
validate_memop(oi, MO_BEUQ);
204
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
205
- stq_be_p(haddr, val);
206
+ store_atom_8(env, ra, haddr, get_memop(oi), be64_to_cpu(val));
207
clear_helper_retaddr();
208
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
209
}
210
@@ -XXX,XX +XXX,XX @@ void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
211
212
validate_memop(oi, MO_LEUW);
213
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
214
- stw_le_p(haddr, val);
215
+ store_atom_2(env, ra, haddr, get_memop(oi), le16_to_cpu(val));
216
clear_helper_retaddr();
217
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
218
}
219
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
220
221
validate_memop(oi, MO_LEUL);
222
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
223
- stl_le_p(haddr, val);
224
+ store_atom_4(env, ra, haddr, get_memop(oi), le32_to_cpu(val));
225
clear_helper_retaddr();
226
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
227
}
228
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
229
230
validate_memop(oi, MO_LEUQ);
231
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
232
- stq_le_p(haddr, val);
233
+ store_atom_8(env, ra, haddr, get_memop(oi), le64_to_cpu(val));
234
clear_helper_retaddr();
235
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
236
}
237
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
238
index XXXXXXX..XXXXXXX 100644
239
--- a/accel/tcg/ldst_atomicity.c.inc
240
+++ b/accel/tcg/ldst_atomicity.c.inc
241
@@ -XXX,XX +XXX,XX @@
242
#else
243
# define HAVE_al16_fast false
244
#endif
245
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
246
+# define HAVE_al16 true
247
+#else
248
+# define HAVE_al16 false
249
+#endif
250
+
251
252
/**
253
* required_atomicity:
254
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
255
g_assert_not_reached();
256
}
257
}
258
+
259
+/**
260
+ * store_atomic2:
261
+ * @pv: host address
262
+ * @val: value to store
263
+ *
264
+ * Atomically store 2 aligned bytes to @pv.
265
+ */
266
+static inline void store_atomic2(void *pv, uint16_t val)
267
+{
268
+ uint16_t *p = __builtin_assume_aligned(pv, 2);
269
+ qatomic_set(p, val);
270
+}
271
+
272
+/**
273
+ * store_atomic4:
274
+ * @pv: host address
275
+ * @val: value to store
276
+ *
277
+ * Atomically store 4 aligned bytes to @pv.
278
+ */
279
+static inline void store_atomic4(void *pv, uint32_t val)
280
+{
281
+ uint32_t *p = __builtin_assume_aligned(pv, 4);
282
+ qatomic_set(p, val);
283
+}
284
+
285
+/**
286
+ * store_atomic8:
287
+ * @pv: host address
288
+ * @val: value to store
289
+ *
290
+ * Atomically store 8 aligned bytes to @pv.
291
+ */
292
+static inline void store_atomic8(void *pv, uint64_t val)
293
+{
294
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
295
+
296
+ qemu_build_assert(HAVE_al8);
297
+ qatomic_set__nocheck(p, val);
298
+}
299
+
300
+/**
301
+ * store_atom_4x2
302
+ */
303
+static inline void store_atom_4_by_2(void *pv, uint32_t val)
304
+{
305
+ store_atomic2(pv, val >> (HOST_BIG_ENDIAN ? 16 : 0));
306
+ store_atomic2(pv + 2, val >> (HOST_BIG_ENDIAN ? 0 : 16));
307
+}
308
+
309
+/**
310
+ * store_atom_8_by_2
311
+ */
312
+static inline void store_atom_8_by_2(void *pv, uint64_t val)
313
+{
314
+ store_atom_4_by_2(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
315
+ store_atom_4_by_2(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
316
+}
317
+
318
+/**
319
+ * store_atom_8_by_4
320
+ */
321
+static inline void store_atom_8_by_4(void *pv, uint64_t val)
322
+{
323
+ store_atomic4(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
324
+ store_atomic4(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
325
+}
326
+
327
+/**
328
+ * store_atom_insert_al4:
329
+ * @p: host address
330
+ * @val: shifted value to store
331
+ * @msk: mask for value to store
332
+ *
333
+ * Atomically store @val to @p, masked by @msk.
334
+ */
335
+static void store_atom_insert_al4(uint32_t *p, uint32_t val, uint32_t msk)
336
+{
337
+ uint32_t old, new;
338
+
339
+ p = __builtin_assume_aligned(p, 4);
340
+ old = qatomic_read(p);
341
+ do {
342
+ new = (old & ~msk) | val;
343
+ } while (!__atomic_compare_exchange_n(p, &old, new, true,
344
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
345
+}
346
+
347
+/**
348
+ * store_atom_insert_al8:
349
+ * @p: host address
350
+ * @val: shifted value to store
351
+ * @msk: mask for value to store
352
+ *
353
+ * Atomically store @val to @p masked by @msk.
354
+ */
355
+static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
356
+{
357
+ uint64_t old, new;
358
+
359
+ qemu_build_assert(HAVE_al8);
360
+ p = __builtin_assume_aligned(p, 8);
361
+ old = qatomic_read__nocheck(p);
362
+ do {
363
+ new = (old & ~msk) | val;
364
+ } while (!__atomic_compare_exchange_n(p, &old, new, true,
365
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
366
+}
367
+
368
+/**
369
+ * store_atom_insert_al16:
370
+ * @p: host address
371
+ * @val: shifted value to store
372
+ * @msk: mask for value to store
373
+ *
374
+ * Atomically store @val to @p masked by @msk.
375
+ */
376
+static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
377
+{
378
+#if defined(CONFIG_ATOMIC128)
379
+ __uint128_t *pu, old, new;
380
+
381
+ /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
382
+ pu = __builtin_assume_aligned(ps, 16);
383
+ old = *pu;
384
+ do {
385
+ new = (old & ~msk.u) | val.u;
386
+ } while (!__atomic_compare_exchange_n(pu, &old, new, true,
387
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
388
+#elif defined(CONFIG_CMPXCHG128)
389
+ __uint128_t *pu, old, new;
390
+
391
+ /*
392
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
393
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
394
+ * and accept the sequential consistency that comes with it.
395
+ */
396
+ pu = __builtin_assume_aligned(ps, 16);
397
+ do {
398
+ old = *pu;
399
+ new = (old & ~msk.u) | val.u;
400
+ } while (!__sync_bool_compare_and_swap_16(pu, old, new));
401
+#else
402
+ qemu_build_not_reached();
403
+#endif
404
+}
405
+
406
+/**
407
+ * store_bytes_leN:
408
+ * @pv: host address
409
+ * @size: number of bytes to store
410
+ * @val_le: data to store
411
+ *
412
+ * Store @size bytes at @p. The bytes to store are extracted in little-endian order
413
+ * from @val_le; return the bytes of @val_le beyond @size that have not been stored.
414
+ */
415
+static uint64_t store_bytes_leN(void *pv, int size, uint64_t val_le)
416
+{
417
+ uint8_t *p = pv;
418
+ for (int i = 0; i < size; i++, val_le >>= 8) {
419
+ p[i] = val_le;
420
+ }
421
+ return val_le;
422
+}
423
+
424
+/**
425
+ * store_parts_leN
426
+ * @pv: host address
427
+ * @size: number of bytes to store
428
+ * @val_le: data to store
429
+ *
430
+ * As store_bytes_leN, but atomically on each aligned part.
431
+ */
432
+G_GNUC_UNUSED
433
+static uint64_t store_parts_leN(void *pv, int size, uint64_t val_le)
434
+{
435
+ do {
436
+ int n;
437
+
438
+ /* Find minimum of alignment and size */
439
+ switch (((uintptr_t)pv | size) & 7) {
440
+ case 4:
441
+ store_atomic4(pv, le32_to_cpu(val_le));
442
+ val_le >>= 32;
443
+ n = 4;
444
+ break;
445
+ case 2:
446
+ case 6:
447
+ store_atomic2(pv, le16_to_cpu(val_le));
448
+ val_le >>= 16;
449
+ n = 2;
450
+ break;
451
+ default:
452
+ *(uint8_t *)pv = val_le;
453
+ val_le >>= 8;
454
+ n = 1;
455
+ break;
456
+ case 0:
457
+ g_assert_not_reached();
458
+ }
459
+ pv += n;
460
+ size -= n;
461
+ } while (size != 0);
462
+
463
+ return val_le;
464
+}
465
+
466
+/**
467
+ * store_whole_le4
468
+ * @pv: host address
469
+ * @size: number of bytes to store
470
+ * @val_le: data to store
471
+ *
472
+ * As store_bytes_leN, but atomically as a whole.
473
+ * Four aligned bytes are guaranteed to cover the store.
474
+ */
475
+static uint64_t store_whole_le4(void *pv, int size, uint64_t val_le)
476
+{
477
+ int sz = size * 8;
478
+ int o = (uintptr_t)pv & 3;
479
+ int sh = o * 8;
480
+ uint32_t m = MAKE_64BIT_MASK(0, sz);
481
+ uint32_t v;
482
+
483
+ if (HOST_BIG_ENDIAN) {
484
+ v = bswap32(val_le) >> sh;
485
+ m = bswap32(m) >> sh;
486
+ } else {
487
+ v = val_le << sh;
488
+ m <<= sh;
489
+ }
490
+ store_atom_insert_al4(pv - o, v, m);
491
+ return val_le >> sz;
492
+}
493
+
494
+/**
495
+ * store_whole_le8
496
+ * @pv: host address
497
+ * @size: number of bytes to store
498
+ * @val_le: data to store
499
+ *
500
+ * As store_bytes_leN, but atomically as a whole.
501
+ * Eight aligned bytes are guaranteed to cover the store.
502
+ */
503
+static uint64_t store_whole_le8(void *pv, int size, uint64_t val_le)
504
+{
505
+ int sz = size * 8;
506
+ int o = (uintptr_t)pv & 7;
507
+ int sh = o * 8;
508
+ uint64_t m = MAKE_64BIT_MASK(0, sz);
509
+ uint64_t v;
510
+
511
+ qemu_build_assert(HAVE_al8);
512
+ if (HOST_BIG_ENDIAN) {
513
+ v = bswap64(val_le) >> sh;
514
+ m = bswap64(m) >> sh;
515
+ } else {
516
+ v = val_le << sh;
517
+ m <<= sh;
518
+ }
519
+ store_atom_insert_al8(pv - o, v, m);
520
+ return val_le >> sz;
521
+}
522
+
523
+/**
524
+ * store_whole_le16
525
+ * @pv: host address
526
+ * @size: number of bytes to store
527
+ * @val_le: data to store
528
+ *
529
+ * As store_bytes_leN, but atomically as a whole.
530
+ * 16 aligned bytes are guaranteed to cover the store.
531
+ */
532
+static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
533
+{
534
+ int sz = size * 8;
535
+ int o = (uintptr_t)pv & 15;
536
+ int sh = o * 8;
537
+ Int128 m, v;
538
+
539
+ qemu_build_assert(HAVE_al16);
540
+
541
+ /* Like MAKE_64BIT_MASK(0, sz), but larger. */
542
+ if (sz <= 64) {
543
+ m = int128_make64(MAKE_64BIT_MASK(0, sz));
544
+ } else {
545
+ m = int128_make128(-1, MAKE_64BIT_MASK(0, sz - 64));
546
+ }
547
+
548
+ if (HOST_BIG_ENDIAN) {
549
+ v = int128_urshift(bswap128(val_le), sh);
550
+ m = int128_urshift(bswap128(m), sh);
551
+ } else {
552
+ v = int128_lshift(val_le, sh);
553
+ m = int128_lshift(m, sh);
554
+ }
555
+ store_atom_insert_al16(pv - o, v, m);
556
+
557
+ /* Unused if sz <= 64. */
558
+ return int128_gethi(val_le) >> (sz - 64);
559
+}
560
+
561
+/**
562
+ * store_atom_2:
563
+ * @p: host address
564
+ * @val: the value to store
565
+ * @memop: the full memory op
566
+ *
567
+ * Store 2 bytes to @p, honoring the atomicity of @memop.
568
+ */
569
+static void store_atom_2(CPUArchState *env, uintptr_t ra,
570
+ void *pv, MemOp memop, uint16_t val)
571
+{
572
+ uintptr_t pi = (uintptr_t)pv;
573
+ int atmax;
574
+
575
+ if (likely((pi & 1) == 0)) {
576
+ store_atomic2(pv, val);
577
+ return;
578
+ }
579
+
580
+ atmax = required_atomicity(env, pi, memop);
581
+ if (atmax == MO_8) {
582
+ stw_he_p(pv, val);
583
+ return;
584
+ }
585
+
586
+ /*
587
+ * The only case remaining is MO_ATOM_WITHIN16.
588
+ * Big or little endian, we want the middle two bytes in each test.
589
+ */
590
+ if ((pi & 3) == 1) {
591
+ store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
592
+ return;
593
+ } else if ((pi & 7) == 3) {
594
+ if (HAVE_al8) {
595
+ store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
596
+ return;
597
+ }
598
+ } else if ((pi & 15) == 7) {
599
+ if (HAVE_al16) {
600
+ Int128 v = int128_lshift(int128_make64(val), 56);
601
+ Int128 m = int128_lshift(int128_make64(0xffff), 56);
602
+ store_atom_insert_al16(pv - 7, v, m);
603
+ return;
604
+ }
605
+ } else {
606
+ g_assert_not_reached();
607
+ }
608
+
609
+ cpu_loop_exit_atomic(env_cpu(env), ra);
610
+}
611
+
612
+/**
613
+ * store_atom_4:
614
+ * @p: host address
615
+ * @val: the value to store
616
+ * @memop: the full memory op
617
+ *
618
+ * Store 4 bytes to @p, honoring the atomicity of @memop.
619
+ */
620
+static void store_atom_4(CPUArchState *env, uintptr_t ra,
621
+ void *pv, MemOp memop, uint32_t val)
622
+{
623
+ uintptr_t pi = (uintptr_t)pv;
624
+ int atmax;
625
+
626
+ if (likely((pi & 3) == 0)) {
627
+ store_atomic4(pv, val);
628
+ return;
629
+ }
630
+
631
+ atmax = required_atomicity(env, pi, memop);
632
+ switch (atmax) {
633
+ case MO_8:
634
+ stl_he_p(pv, val);
635
+ return;
636
+ case MO_16:
637
+ store_atom_4_by_2(pv, val);
638
+ return;
639
+ case -MO_16:
640
+ {
641
+ uint32_t val_le = cpu_to_le32(val);
642
+ int s2 = pi & 3;
643
+ int s1 = 4 - s2;
644
+
645
+ switch (s2) {
646
+ case 1:
647
+ val_le = store_whole_le4(pv, s1, val_le);
648
+ *(uint8_t *)(pv + 3) = val_le;
649
+ break;
650
+ case 3:
651
+ *(uint8_t *)pv = val_le;
652
+ store_whole_le4(pv + 1, s2, val_le >> 8);
653
+ break;
654
+ case 0: /* aligned */
655
+ case 2: /* atmax MO_16 */
656
+ default:
657
+ g_assert_not_reached();
658
+ }
659
+ }
660
+ return;
661
+ case MO_32:
662
+ if ((pi & 7) < 4) {
663
+ if (HAVE_al8) {
664
+ store_whole_le8(pv, 4, cpu_to_le32(val));
665
+ return;
666
+ }
667
+ } else {
668
+ if (HAVE_al16) {
669
+ store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
670
+ return;
671
+ }
672
+ }
673
+ cpu_loop_exit_atomic(env_cpu(env), ra);
674
+ default:
675
+ g_assert_not_reached();
676
+ }
677
+}
678
+
679
+/**
680
+ * store_atom_8:
681
+ * @p: host address
682
+ * @val: the value to store
683
+ * @memop: the full memory op
684
+ *
685
+ * Store 8 bytes to @p, honoring the atomicity of @memop.
686
+ */
687
+static void store_atom_8(CPUArchState *env, uintptr_t ra,
688
+ void *pv, MemOp memop, uint64_t val)
689
+{
690
+ uintptr_t pi = (uintptr_t)pv;
691
+ int atmax;
692
+
693
+ if (HAVE_al8 && likely((pi & 7) == 0)) {
694
+ store_atomic8(pv, val);
695
+ return;
696
+ }
697
+
698
+ atmax = required_atomicity(env, pi, memop);
699
+ switch (atmax) {
700
+ case MO_8:
701
+ stq_he_p(pv, val);
702
+ return;
703
+ case MO_16:
704
+ store_atom_8_by_2(pv, val);
705
+ return;
706
+ case MO_32:
707
+ store_atom_8_by_4(pv, val);
708
+ return;
709
+ case -MO_32:
710
+ if (HAVE_al8) {
711
+ uint64_t val_le = cpu_to_le64(val);
712
+ int s2 = pi & 7;
713
+ int s1 = 8 - s2;
714
+
715
+ switch (s2) {
716
+ case 1 ... 3:
717
+ val_le = store_whole_le8(pv, s1, val_le);
718
+ store_bytes_leN(pv + s1, s2, val_le);
719
+ break;
720
+ case 5 ... 7:
721
+ val_le = store_bytes_leN(pv, s1, val_le);
722
+ store_whole_le8(pv + s1, s2, val_le);
723
+ break;
724
+ case 0: /* aligned */
725
+ case 4: /* atmax MO_32 */
726
+ default:
727
+ g_assert_not_reached();
728
+ }
729
+ return;
730
+ }
731
+ break;
732
+ case MO_64:
733
+ if (HAVE_al16) {
734
+ store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
735
+ return;
736
+ }
737
+ break;
738
+ default:
739
+ g_assert_not_reached();
740
+ }
741
+ cpu_loop_exit_atomic(env_cpu(env), ra);
742
+}
94
--
743
--
95
2.20.1
744
2.34.1
96
97
diff view generated by jsdifflib
1
There is only one caller for tlb_table_flush_by_mmuidx. Place
1
With the current structure of cputlb.c, there is no difference
2
the result at the earlier line number, due to an expected user
2
between the little-endian and big-endian entry points, aside
3
in the near future.
3
from the assert. Unify the pairs of functions.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Hoist the qemu_{ld,st}_helpers arrays to tcg.c.
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
accel/tcg/cputlb.c | 19 +++++++------------
11
docs/devel/loads-stores.rst | 36 ++----
10
1 file changed, 7 insertions(+), 12 deletions(-)
12
include/tcg/tcg-ldst.h | 60 ++++------
13
accel/tcg/cputlb.c | 190 ++++++++++---------------------
14
tcg/tcg.c | 21 ++++
15
tcg/tci.c | 61 ++++------
16
tcg/aarch64/tcg-target.c.inc | 33 ------
17
tcg/arm/tcg-target.c.inc | 37 ------
18
tcg/i386/tcg-target.c.inc | 30 +----
19
tcg/loongarch64/tcg-target.c.inc | 23 ----
20
tcg/mips/tcg-target.c.inc | 31 -----
21
tcg/ppc/tcg-target.c.inc | 30 +----
22
tcg/riscv/tcg-target.c.inc | 42 -------
23
tcg/s390x/tcg-target.c.inc | 31 +----
24
tcg/sparc64/tcg-target.c.inc | 32 +-----
25
14 files changed, 146 insertions(+), 511 deletions(-)
11
26
27
diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
28
index XXXXXXX..XXXXXXX 100644
29
--- a/docs/devel/loads-stores.rst
30
+++ b/docs/devel/loads-stores.rst
31
@@ -XXX,XX +XXX,XX @@ swap: ``translator_ld{sign}{size}_swap(env, ptr, swap)``
32
Regexes for git grep
33
- ``\<translator_ld[us]\?[bwlq]\(_swap\)\?\>``
34
35
-``helper_*_{ld,st}*_mmu``
36
+``helper_{ld,st}*_mmu``
37
~~~~~~~~~~~~~~~~~~~~~~~~~
38
39
These functions are intended primarily to be called by the code
40
-generated by the TCG backend. They may also be called by target
41
-CPU helper function code. Like the ``cpu_{ld,st}_mmuidx_ra`` functions
42
-they perform accesses by guest virtual address, with a given ``mmuidx``.
43
+generated by the TCG backend. Like the ``cpu_{ld,st}_mmu`` functions
44
+they perform accesses by guest virtual address, with a given ``MemOpIdx``.
45
46
-These functions specify an ``opindex`` parameter which encodes
47
-(among other things) the mmu index to use for the access. This parameter
48
-should be created by calling ``make_memop_idx()``.
49
+They differ from ``cpu_{ld,st}_mmu`` in that they take the endianness
50
+of the operation only from the MemOpIdx, and loads extend the return
51
+value to the size of a host general register (``tcg_target_ulong``).
52
53
-The ``retaddr`` parameter should be the result of GETPC() called directly
54
-from the top level HELPER(foo) function (or 0 if no guest CPU state
55
-unwinding is required).
56
+load: ``helper_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
57
58
-**TODO** The names of these functions are a bit odd for historical
59
-reasons because they were originally expected to be called only from
60
-within generated code. We should rename them to bring them more in
61
-line with the other memory access functions. The explicit endianness
62
-is the only feature they have beyond ``*_mmuidx_ra``.
63
-
64
-load: ``helper_{endian}_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
65
-
66
-store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
67
+store: ``helper_{size}_mmu(env, addr, val, opindex, retaddr)``
68
69
``sign``
70
- (empty) : for 32 or 64 bit sizes
71
@@ -XXX,XX +XXX,XX @@ store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
72
- ``l`` : 32 bits
73
- ``q`` : 64 bits
74
75
-``endian``
76
- - ``le`` : little endian
77
- - ``be`` : big endian
78
- - ``ret`` : target endianness
79
-
80
Regexes for git grep
81
- - ``\<helper_\(le\|be\|ret\)_ld[us]\?[bwlq]_mmu\>``
82
- - ``\<helper_\(le\|be\|ret\)_st[bwlq]_mmu\>``
83
+ - ``\<helper_ld[us]\?[bwlq]_mmu\>``
84
+ - ``\<helper_st[bwlq]_mmu\>``
85
86
``address_space_*``
87
~~~~~~~~~~~~~~~~~~~
88
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/tcg/tcg-ldst.h
91
+++ b/include/tcg/tcg-ldst.h
92
@@ -XXX,XX +XXX,XX @@
93
#ifdef CONFIG_SOFTMMU
94
95
/* Value zero-extended to tcg register size. */
96
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
97
- MemOpIdx oi, uintptr_t retaddr);
98
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
99
- MemOpIdx oi, uintptr_t retaddr);
100
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
101
- MemOpIdx oi, uintptr_t retaddr);
102
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
103
- MemOpIdx oi, uintptr_t retaddr);
104
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
105
- MemOpIdx oi, uintptr_t retaddr);
106
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
107
- MemOpIdx oi, uintptr_t retaddr);
108
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
109
- MemOpIdx oi, uintptr_t retaddr);
110
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
111
+ MemOpIdx oi, uintptr_t retaddr);
112
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
113
+ MemOpIdx oi, uintptr_t retaddr);
114
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
115
+ MemOpIdx oi, uintptr_t retaddr);
116
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
117
+ MemOpIdx oi, uintptr_t retaddr);
118
119
/* Value sign-extended to tcg register size. */
120
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
121
- MemOpIdx oi, uintptr_t retaddr);
122
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
123
- MemOpIdx oi, uintptr_t retaddr);
124
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
125
- MemOpIdx oi, uintptr_t retaddr);
126
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
127
- MemOpIdx oi, uintptr_t retaddr);
128
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
129
- MemOpIdx oi, uintptr_t retaddr);
130
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
131
+ MemOpIdx oi, uintptr_t retaddr);
132
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
133
+ MemOpIdx oi, uintptr_t retaddr);
134
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
135
+ MemOpIdx oi, uintptr_t retaddr);
136
137
/*
138
* Value extended to at least uint32_t, so that some ABIs do not require
139
* zero-extension from uint8_t or uint16_t.
140
*/
141
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
142
- MemOpIdx oi, uintptr_t retaddr);
143
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
144
- MemOpIdx oi, uintptr_t retaddr);
145
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
146
- MemOpIdx oi, uintptr_t retaddr);
147
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
148
- MemOpIdx oi, uintptr_t retaddr);
149
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
150
- MemOpIdx oi, uintptr_t retaddr);
151
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
152
- MemOpIdx oi, uintptr_t retaddr);
153
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
154
- MemOpIdx oi, uintptr_t retaddr);
155
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
156
+ MemOpIdx oi, uintptr_t retaddr);
157
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
158
+ MemOpIdx oi, uintptr_t retaddr);
159
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
160
+ MemOpIdx oi, uintptr_t retaddr);
161
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
162
+ MemOpIdx oi, uintptr_t retaddr);
163
164
#else
165
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
166
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
index XXXXXXX..XXXXXXX 100644
167
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
168
--- a/accel/tcg/cputlb.c
15
+++ b/accel/tcg/cputlb.c
169
+++ b/accel/tcg/cputlb.c
16
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
170
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
171
cpu_loop_exit_atomic(env_cpu(env), retaddr);
172
}
173
174
-/*
175
- * Verify that we have passed the correct MemOp to the correct function.
176
- *
177
- * In the case of the helper_*_mmu functions, we will have done this by
178
- * using the MemOp to look up the helper during code generation.
179
- *
180
- * In the case of the cpu_*_mmu functions, this is up to the caller.
181
- * We could present one function to target code, and dispatch based on
182
- * the MemOp, but so far we have worked hard to avoid an indirect function
183
- * call along the memory path.
184
- */
185
-static void validate_memop(MemOpIdx oi, MemOp expected)
186
-{
187
-#ifdef CONFIG_DEBUG_TCG
188
- MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
189
- assert(have == expected);
190
-#endif
191
-}
192
-
193
/*
194
* Load Helpers
195
*
196
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
197
return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
198
}
199
200
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
201
- MemOpIdx oi, uintptr_t retaddr)
202
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
203
+ MemOpIdx oi, uintptr_t retaddr)
204
{
205
- validate_memop(oi, MO_UB);
206
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
207
return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
208
}
209
210
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
211
return ret;
212
}
213
214
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
215
- MemOpIdx oi, uintptr_t retaddr)
216
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
217
+ MemOpIdx oi, uintptr_t retaddr)
218
{
219
- validate_memop(oi, MO_LEUW);
220
- return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
221
-}
222
-
223
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
224
- MemOpIdx oi, uintptr_t retaddr)
225
-{
226
- validate_memop(oi, MO_BEUW);
227
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
228
return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
229
}
230
231
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
232
return ret;
233
}
234
235
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
236
- MemOpIdx oi, uintptr_t retaddr)
237
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
238
+ MemOpIdx oi, uintptr_t retaddr)
239
{
240
- validate_memop(oi, MO_LEUL);
241
- return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
242
-}
243
-
244
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
245
- MemOpIdx oi, uintptr_t retaddr)
246
-{
247
- validate_memop(oi, MO_BEUL);
248
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
249
return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
250
}
251
252
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
253
return ret;
254
}
255
256
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
257
- MemOpIdx oi, uintptr_t retaddr)
258
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
259
+ MemOpIdx oi, uintptr_t retaddr)
260
{
261
- validate_memop(oi, MO_LEUQ);
262
- return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
263
-}
264
-
265
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
266
- MemOpIdx oi, uintptr_t retaddr)
267
-{
268
- validate_memop(oi, MO_BEUQ);
269
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
270
return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
271
}
272
273
@@ -XXX,XX +XXX,XX @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
274
* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
275
*/
276
277
-
278
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
279
- MemOpIdx oi, uintptr_t retaddr)
280
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
281
+ MemOpIdx oi, uintptr_t retaddr)
282
{
283
- return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
284
+ return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
285
}
286
287
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
288
- MemOpIdx oi, uintptr_t retaddr)
289
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
290
+ MemOpIdx oi, uintptr_t retaddr)
291
{
292
- return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
293
+ return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
294
}
295
296
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
297
- MemOpIdx oi, uintptr_t retaddr)
298
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
299
+ MemOpIdx oi, uintptr_t retaddr)
300
{
301
- return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
302
-}
303
-
304
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
305
- MemOpIdx oi, uintptr_t retaddr)
306
-{
307
- return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
308
-}
309
-
310
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
311
- MemOpIdx oi, uintptr_t retaddr)
312
-{
313
- return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
314
+ return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
315
}
316
317
/*
318
@@ -XXX,XX +XXX,XX @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
319
{
320
uint8_t ret;
321
322
- validate_memop(oi, MO_UB);
323
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_UB);
324
ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
325
plugin_load_cb(env, addr, oi);
326
return ret;
327
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
328
{
329
uint16_t ret;
330
331
- validate_memop(oi, MO_BEUW);
332
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
333
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
334
plugin_load_cb(env, addr, oi);
335
return ret;
336
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
337
{
338
uint32_t ret;
339
340
- validate_memop(oi, MO_BEUL);
341
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
342
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
343
plugin_load_cb(env, addr, oi);
344
return ret;
345
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
346
{
347
uint64_t ret;
348
349
- validate_memop(oi, MO_BEUQ);
350
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
351
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
352
plugin_load_cb(env, addr, oi);
353
return ret;
354
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
355
{
356
uint16_t ret;
357
358
- validate_memop(oi, MO_LEUW);
359
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
360
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
361
plugin_load_cb(env, addr, oi);
362
return ret;
363
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
364
{
365
uint32_t ret;
366
367
- validate_memop(oi, MO_LEUL);
368
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
369
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
370
plugin_load_cb(env, addr, oi);
371
return ret;
372
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
373
{
374
uint64_t ret;
375
376
- validate_memop(oi, MO_LEUQ);
377
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
378
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
379
plugin_load_cb(env, addr, oi);
380
return ret;
381
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
382
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
383
new_oi = make_memop_idx(mop, mmu_idx);
384
385
- h = helper_be_ldq_mmu(env, addr, new_oi, ra);
386
- l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
387
+ h = helper_ldq_mmu(env, addr, new_oi, ra);
388
+ l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
389
390
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
391
return int128_make128(l, h);
392
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
393
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
394
new_oi = make_memop_idx(mop, mmu_idx);
395
396
- l = helper_le_ldq_mmu(env, addr, new_oi, ra);
397
- h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
398
+ l = helper_ldq_mmu(env, addr, new_oi, ra);
399
+ h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
400
401
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
402
return int128_make128(l, h);
403
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
17
}
404
}
18
}
405
}
19
406
20
-static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
407
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
21
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
408
- MemOpIdx oi, uintptr_t ra)
22
{
409
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
23
tlb_mmu_resize_locked(env, mmu_idx);
410
+ MemOpIdx oi, uintptr_t ra)
24
- memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
411
{
25
env_tlb(env)->d[mmu_idx].n_used_entries = 0;
412
MMULookupLocals l;
26
+ env_tlb(env)->d[mmu_idx].large_page_addr = -1;
413
bool crosspage;
27
+ env_tlb(env)->d[mmu_idx].large_page_mask = -1;
414
28
+ env_tlb(env)->d[mmu_idx].vindex = 0;
415
- validate_memop(oi, MO_UB);
29
+ memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
416
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
30
+ memset(env_tlb(env)->d[mmu_idx].vtable, -1,
417
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
31
+ sizeof(env_tlb(env)->d[0].vtable));
418
tcg_debug_assert(!crosspage);
32
}
419
33
420
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
34
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
421
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
35
@@ -XXX,XX +XXX,XX @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
422
}
36
*pelide = elide;
423
37
}
424
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
38
425
- MemOpIdx oi, uintptr_t retaddr)
39
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
426
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
427
+ MemOpIdx oi, uintptr_t retaddr)
428
{
429
- validate_memop(oi, MO_LEUW);
430
- do_st2_mmu(env, addr, val, oi, retaddr);
431
-}
432
-
433
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
434
- MemOpIdx oi, uintptr_t retaddr)
40
-{
435
-{
41
- tlb_table_flush_by_mmuidx(env, mmu_idx);
436
- validate_memop(oi, MO_BEUW);
42
- env_tlb(env)->d[mmu_idx].large_page_addr = -1;
437
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
43
- env_tlb(env)->d[mmu_idx].large_page_mask = -1;
438
do_st2_mmu(env, addr, val, oi, retaddr);
44
- env_tlb(env)->d[mmu_idx].vindex = 0;
439
}
45
- memset(env_tlb(env)->d[mmu_idx].vtable, -1,
440
46
- sizeof(env_tlb(env)->d[0].vtable));
441
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
442
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
443
}
444
445
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
446
- MemOpIdx oi, uintptr_t retaddr)
447
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
448
+ MemOpIdx oi, uintptr_t retaddr)
449
{
450
- validate_memop(oi, MO_LEUL);
451
- do_st4_mmu(env, addr, val, oi, retaddr);
47
-}
452
-}
48
-
453
-
49
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
454
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
50
{
455
- MemOpIdx oi, uintptr_t retaddr)
51
CPUArchState *env = cpu->env_ptr;
456
-{
457
- validate_memop(oi, MO_BEUL);
458
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
459
do_st4_mmu(env, addr, val, oi, retaddr);
460
}
461
462
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
463
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
464
}
465
466
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
467
- MemOpIdx oi, uintptr_t retaddr)
468
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
469
+ MemOpIdx oi, uintptr_t retaddr)
470
{
471
- validate_memop(oi, MO_LEUQ);
472
- do_st8_mmu(env, addr, val, oi, retaddr);
473
-}
474
-
475
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
476
- MemOpIdx oi, uintptr_t retaddr)
477
-{
478
- validate_memop(oi, MO_BEUQ);
479
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
480
do_st8_mmu(env, addr, val, oi, retaddr);
481
}
482
483
@@ -XXX,XX +XXX,XX @@ static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
484
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
485
MemOpIdx oi, uintptr_t retaddr)
486
{
487
- helper_ret_stb_mmu(env, addr, val, oi, retaddr);
488
+ helper_stb_mmu(env, addr, val, oi, retaddr);
489
plugin_store_cb(env, addr, oi);
490
}
491
492
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
493
MemOpIdx oi, uintptr_t retaddr)
494
{
495
- helper_be_stw_mmu(env, addr, val, oi, retaddr);
496
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
497
+ do_st2_mmu(env, addr, val, oi, retaddr);
498
plugin_store_cb(env, addr, oi);
499
}
500
501
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
502
MemOpIdx oi, uintptr_t retaddr)
503
{
504
- helper_be_stl_mmu(env, addr, val, oi, retaddr);
505
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
506
+ do_st4_mmu(env, addr, val, oi, retaddr);
507
plugin_store_cb(env, addr, oi);
508
}
509
510
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
511
MemOpIdx oi, uintptr_t retaddr)
512
{
513
- helper_be_stq_mmu(env, addr, val, oi, retaddr);
514
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
515
+ do_st8_mmu(env, addr, val, oi, retaddr);
516
plugin_store_cb(env, addr, oi);
517
}
518
519
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
520
MemOpIdx oi, uintptr_t retaddr)
521
{
522
- helper_le_stw_mmu(env, addr, val, oi, retaddr);
523
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
524
+ do_st2_mmu(env, addr, val, oi, retaddr);
525
plugin_store_cb(env, addr, oi);
526
}
527
528
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
529
MemOpIdx oi, uintptr_t retaddr)
530
{
531
- helper_le_stl_mmu(env, addr, val, oi, retaddr);
532
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
533
+ do_st4_mmu(env, addr, val, oi, retaddr);
534
plugin_store_cb(env, addr, oi);
535
}
536
537
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
538
MemOpIdx oi, uintptr_t retaddr)
539
{
540
- helper_le_stq_mmu(env, addr, val, oi, retaddr);
541
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
542
+ do_st8_mmu(env, addr, val, oi, retaddr);
543
plugin_store_cb(env, addr, oi);
544
}
545
546
@@ -XXX,XX +XXX,XX @@ void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
547
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
548
new_oi = make_memop_idx(mop, mmu_idx);
549
550
- helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
551
- helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
552
+ helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
553
+ helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
554
555
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
556
}
557
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
558
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
559
new_oi = make_memop_idx(mop, mmu_idx);
560
561
- helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
562
- helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
563
+ helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
564
+ helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
565
566
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
567
}
568
diff --git a/tcg/tcg.c b/tcg/tcg.c
569
index XXXXXXX..XXXXXXX 100644
570
--- a/tcg/tcg.c
571
+++ b/tcg/tcg.c
572
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
573
const TCGLdstHelperParam *p)
574
__attribute__((unused));
575
576
+#ifdef CONFIG_SOFTMMU
577
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
578
+ [MO_UB] = helper_ldub_mmu,
579
+ [MO_SB] = helper_ldsb_mmu,
580
+ [MO_UW] = helper_lduw_mmu,
581
+ [MO_SW] = helper_ldsw_mmu,
582
+ [MO_UL] = helper_ldul_mmu,
583
+ [MO_UQ] = helper_ldq_mmu,
584
+#if TCG_TARGET_REG_BITS == 64
585
+ [MO_SL] = helper_ldsl_mmu,
586
+#endif
587
+};
588
+
589
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
590
+ [MO_8] = helper_stb_mmu,
591
+ [MO_16] = helper_stw_mmu,
592
+ [MO_32] = helper_stl_mmu,
593
+ [MO_64] = helper_stq_mmu,
594
+};
595
+#endif
596
+
597
TCGContext tcg_init_ctx;
598
__thread TCGContext *tcg_ctx;
599
600
diff --git a/tcg/tci.c b/tcg/tci.c
601
index XXXXXXX..XXXXXXX 100644
602
--- a/tcg/tci.c
603
+++ b/tcg/tci.c
604
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
605
uintptr_t ra = (uintptr_t)tb_ptr;
606
607
#ifdef CONFIG_SOFTMMU
608
- switch (mop & (MO_BSWAP | MO_SSIZE)) {
609
+ switch (mop & MO_SSIZE) {
610
case MO_UB:
611
- return helper_ret_ldub_mmu(env, taddr, oi, ra);
612
+ return helper_ldub_mmu(env, taddr, oi, ra);
613
case MO_SB:
614
- return helper_ret_ldsb_mmu(env, taddr, oi, ra);
615
- case MO_LEUW:
616
- return helper_le_lduw_mmu(env, taddr, oi, ra);
617
- case MO_LESW:
618
- return helper_le_ldsw_mmu(env, taddr, oi, ra);
619
- case MO_LEUL:
620
- return helper_le_ldul_mmu(env, taddr, oi, ra);
621
- case MO_LESL:
622
- return helper_le_ldsl_mmu(env, taddr, oi, ra);
623
- case MO_LEUQ:
624
- return helper_le_ldq_mmu(env, taddr, oi, ra);
625
- case MO_BEUW:
626
- return helper_be_lduw_mmu(env, taddr, oi, ra);
627
- case MO_BESW:
628
- return helper_be_ldsw_mmu(env, taddr, oi, ra);
629
- case MO_BEUL:
630
- return helper_be_ldul_mmu(env, taddr, oi, ra);
631
- case MO_BESL:
632
- return helper_be_ldsl_mmu(env, taddr, oi, ra);
633
- case MO_BEUQ:
634
- return helper_be_ldq_mmu(env, taddr, oi, ra);
635
+ return helper_ldsb_mmu(env, taddr, oi, ra);
636
+ case MO_UW:
637
+ return helper_lduw_mmu(env, taddr, oi, ra);
638
+ case MO_SW:
639
+ return helper_ldsw_mmu(env, taddr, oi, ra);
640
+ case MO_UL:
641
+ return helper_ldul_mmu(env, taddr, oi, ra);
642
+ case MO_SL:
643
+ return helper_ldsl_mmu(env, taddr, oi, ra);
644
+ case MO_UQ:
645
+ return helper_ldq_mmu(env, taddr, oi, ra);
646
default:
647
g_assert_not_reached();
648
}
649
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
650
uintptr_t ra = (uintptr_t)tb_ptr;
651
652
#ifdef CONFIG_SOFTMMU
653
- switch (mop & (MO_BSWAP | MO_SIZE)) {
654
+ switch (mop & MO_SIZE) {
655
case MO_UB:
656
- helper_ret_stb_mmu(env, taddr, val, oi, ra);
657
+ helper_stb_mmu(env, taddr, val, oi, ra);
658
break;
659
- case MO_LEUW:
660
- helper_le_stw_mmu(env, taddr, val, oi, ra);
661
+ case MO_UW:
662
+ helper_stw_mmu(env, taddr, val, oi, ra);
663
break;
664
- case MO_LEUL:
665
- helper_le_stl_mmu(env, taddr, val, oi, ra);
666
+ case MO_UL:
667
+ helper_stl_mmu(env, taddr, val, oi, ra);
668
break;
669
- case MO_LEUQ:
670
- helper_le_stq_mmu(env, taddr, val, oi, ra);
671
- break;
672
- case MO_BEUW:
673
- helper_be_stw_mmu(env, taddr, val, oi, ra);
674
- break;
675
- case MO_BEUL:
676
- helper_be_stl_mmu(env, taddr, val, oi, ra);
677
- break;
678
- case MO_BEUQ:
679
- helper_be_stq_mmu(env, taddr, val, oi, ra);
680
+ case MO_UQ:
681
+ helper_stq_mmu(env, taddr, val, oi, ra);
682
break;
683
default:
684
g_assert_not_reached();
685
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
686
index XXXXXXX..XXXXXXX 100644
687
--- a/tcg/aarch64/tcg-target.c.inc
688
+++ b/tcg/aarch64/tcg-target.c.inc
689
@@ -XXX,XX +XXX,XX @@ typedef struct {
690
} HostAddress;
691
692
#ifdef CONFIG_SOFTMMU
693
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
694
- * MemOpIdx oi, uintptr_t ra)
695
- */
696
-static void * const qemu_ld_helpers[MO_SIZE + 1] = {
697
- [MO_8] = helper_ret_ldub_mmu,
698
-#if HOST_BIG_ENDIAN
699
- [MO_16] = helper_be_lduw_mmu,
700
- [MO_32] = helper_be_ldul_mmu,
701
- [MO_64] = helper_be_ldq_mmu,
702
-#else
703
- [MO_16] = helper_le_lduw_mmu,
704
- [MO_32] = helper_le_ldul_mmu,
705
- [MO_64] = helper_le_ldq_mmu,
706
-#endif
707
-};
708
-
709
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
710
- * uintxx_t val, MemOpIdx oi,
711
- * uintptr_t ra)
712
- */
713
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
714
- [MO_8] = helper_ret_stb_mmu,
715
-#if HOST_BIG_ENDIAN
716
- [MO_16] = helper_be_stw_mmu,
717
- [MO_32] = helper_be_stl_mmu,
718
- [MO_64] = helper_be_stq_mmu,
719
-#else
720
- [MO_16] = helper_le_stw_mmu,
721
- [MO_32] = helper_le_stl_mmu,
722
- [MO_64] = helper_le_stq_mmu,
723
-#endif
724
-};
725
-
726
static const TCGLdstHelperParam ldst_helper_param = {
727
.ntmp = 1, .tmp = { TCG_REG_TMP }
728
};
729
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
730
index XXXXXXX..XXXXXXX 100644
731
--- a/tcg/arm/tcg-target.c.inc
732
+++ b/tcg/arm/tcg-target.c.inc
733
@@ -XXX,XX +XXX,XX @@ typedef struct {
734
} HostAddress;
735
736
#ifdef CONFIG_SOFTMMU
737
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
738
- * int mmu_idx, uintptr_t ra)
739
- */
740
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
741
- [MO_UB] = helper_ret_ldub_mmu,
742
- [MO_SB] = helper_ret_ldsb_mmu,
743
-#if HOST_BIG_ENDIAN
744
- [MO_UW] = helper_be_lduw_mmu,
745
- [MO_UL] = helper_be_ldul_mmu,
746
- [MO_UQ] = helper_be_ldq_mmu,
747
- [MO_SW] = helper_be_ldsw_mmu,
748
- [MO_SL] = helper_be_ldul_mmu,
749
-#else
750
- [MO_UW] = helper_le_lduw_mmu,
751
- [MO_UL] = helper_le_ldul_mmu,
752
- [MO_UQ] = helper_le_ldq_mmu,
753
- [MO_SW] = helper_le_ldsw_mmu,
754
- [MO_SL] = helper_le_ldul_mmu,
755
-#endif
756
-};
757
-
758
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
759
- * uintxx_t val, int mmu_idx, uintptr_t ra)
760
- */
761
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
762
- [MO_8] = helper_ret_stb_mmu,
763
-#if HOST_BIG_ENDIAN
764
- [MO_16] = helper_be_stw_mmu,
765
- [MO_32] = helper_be_stl_mmu,
766
- [MO_64] = helper_be_stq_mmu,
767
-#else
768
- [MO_16] = helper_le_stw_mmu,
769
- [MO_32] = helper_le_stl_mmu,
770
- [MO_64] = helper_le_stq_mmu,
771
-#endif
772
-};
773
-
774
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
775
{
776
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
777
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
778
index XXXXXXX..XXXXXXX 100644
779
--- a/tcg/i386/tcg-target.c.inc
780
+++ b/tcg/i386/tcg-target.c.inc
781
@@ -XXX,XX +XXX,XX @@ typedef struct {
782
} HostAddress;
783
784
#if defined(CONFIG_SOFTMMU)
785
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
786
- * int mmu_idx, uintptr_t ra)
787
- */
788
-static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
789
- [MO_UB] = helper_ret_ldub_mmu,
790
- [MO_LEUW] = helper_le_lduw_mmu,
791
- [MO_LEUL] = helper_le_ldul_mmu,
792
- [MO_LEUQ] = helper_le_ldq_mmu,
793
- [MO_BEUW] = helper_be_lduw_mmu,
794
- [MO_BEUL] = helper_be_ldul_mmu,
795
- [MO_BEUQ] = helper_be_ldq_mmu,
796
-};
797
-
798
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
799
- * uintxx_t val, int mmu_idx, uintptr_t ra)
800
- */
801
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
802
- [MO_UB] = helper_ret_stb_mmu,
803
- [MO_LEUW] = helper_le_stw_mmu,
804
- [MO_LEUL] = helper_le_stl_mmu,
805
- [MO_LEUQ] = helper_le_stq_mmu,
806
- [MO_BEUW] = helper_be_stw_mmu,
807
- [MO_BEUL] = helper_be_stl_mmu,
808
- [MO_BEUQ] = helper_be_stq_mmu,
809
-};
810
-
811
/*
812
* Because i686 has no register parameters and because x86_64 has xchg
813
* to handle addr/data register overlap, we have placed all input arguments
814
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
815
}
816
817
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
818
- tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
819
+ tcg_out_branch(s, 1, qemu_ld_helpers[opc & MO_SIZE]);
820
tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
821
822
tcg_out_jmp(s, l->raddr);
823
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
824
}
825
826
tcg_out_st_helper_args(s, l, &ldst_helper_param);
827
- tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
828
+ tcg_out_branch(s, 1, qemu_st_helpers[opc & MO_SIZE]);
829
830
tcg_out_jmp(s, l->raddr);
831
return true;
832
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
833
index XXXXXXX..XXXXXXX 100644
834
--- a/tcg/loongarch64/tcg-target.c.inc
835
+++ b/tcg/loongarch64/tcg-target.c.inc
836
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
837
*/
838
839
#if defined(CONFIG_SOFTMMU)
840
-/*
841
- * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
842
- * MemOpIdx oi, uintptr_t ra)
843
- */
844
-static void * const qemu_ld_helpers[4] = {
845
- [MO_8] = helper_ret_ldub_mmu,
846
- [MO_16] = helper_le_lduw_mmu,
847
- [MO_32] = helper_le_ldul_mmu,
848
- [MO_64] = helper_le_ldq_mmu,
849
-};
850
-
851
-/*
852
- * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
853
- * uintxx_t val, MemOpIdx oi,
854
- * uintptr_t ra)
855
- */
856
-static void * const qemu_st_helpers[4] = {
857
- [MO_8] = helper_ret_stb_mmu,
858
- [MO_16] = helper_le_stw_mmu,
859
- [MO_32] = helper_le_stl_mmu,
860
- [MO_64] = helper_le_stq_mmu,
861
-};
862
-
863
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
864
{
865
tcg_out_opc_b(s, 0);
866
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
867
index XXXXXXX..XXXXXXX 100644
868
--- a/tcg/mips/tcg-target.c.inc
869
+++ b/tcg/mips/tcg-target.c.inc
870
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
871
}
872
873
#if defined(CONFIG_SOFTMMU)
874
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
875
- [MO_UB] = helper_ret_ldub_mmu,
876
- [MO_SB] = helper_ret_ldsb_mmu,
877
-#if HOST_BIG_ENDIAN
878
- [MO_UW] = helper_be_lduw_mmu,
879
- [MO_SW] = helper_be_ldsw_mmu,
880
- [MO_UL] = helper_be_ldul_mmu,
881
- [MO_SL] = helper_be_ldsl_mmu,
882
- [MO_UQ] = helper_be_ldq_mmu,
883
-#else
884
- [MO_UW] = helper_le_lduw_mmu,
885
- [MO_SW] = helper_le_ldsw_mmu,
886
- [MO_UL] = helper_le_ldul_mmu,
887
- [MO_UQ] = helper_le_ldq_mmu,
888
- [MO_SL] = helper_le_ldsl_mmu,
889
-#endif
890
-};
891
-
892
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
893
- [MO_UB] = helper_ret_stb_mmu,
894
-#if HOST_BIG_ENDIAN
895
- [MO_UW] = helper_be_stw_mmu,
896
- [MO_UL] = helper_be_stl_mmu,
897
- [MO_UQ] = helper_be_stq_mmu,
898
-#else
899
- [MO_UW] = helper_le_stw_mmu,
900
- [MO_UL] = helper_le_stl_mmu,
901
- [MO_UQ] = helper_le_stq_mmu,
902
-#endif
903
-};
904
-
905
/* We have four temps, we might as well expose three of them. */
906
static const TCGLdstHelperParam ldst_helper_param = {
907
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
908
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
909
index XXXXXXX..XXXXXXX 100644
910
--- a/tcg/ppc/tcg-target.c.inc
911
+++ b/tcg/ppc/tcg-target.c.inc
912
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
913
};
914
915
#if defined (CONFIG_SOFTMMU)
916
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
917
- * int mmu_idx, uintptr_t ra)
918
- */
919
-static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
920
- [MO_UB] = helper_ret_ldub_mmu,
921
- [MO_LEUW] = helper_le_lduw_mmu,
922
- [MO_LEUL] = helper_le_ldul_mmu,
923
- [MO_LEUQ] = helper_le_ldq_mmu,
924
- [MO_BEUW] = helper_be_lduw_mmu,
925
- [MO_BEUL] = helper_be_ldul_mmu,
926
- [MO_BEUQ] = helper_be_ldq_mmu,
927
-};
928
-
929
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
930
- * uintxx_t val, int mmu_idx, uintptr_t ra)
931
- */
932
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
933
- [MO_UB] = helper_ret_stb_mmu,
934
- [MO_LEUW] = helper_le_stw_mmu,
935
- [MO_LEUL] = helper_le_stl_mmu,
936
- [MO_LEUQ] = helper_le_stq_mmu,
937
- [MO_BEUW] = helper_be_stw_mmu,
938
- [MO_BEUL] = helper_be_stl_mmu,
939
- [MO_BEUQ] = helper_be_stq_mmu,
940
-};
941
-
942
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
943
{
944
if (arg < 0) {
945
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
946
}
947
948
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
949
- tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
950
+ tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
951
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
952
953
tcg_out_b(s, 0, lb->raddr);
954
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
955
}
956
957
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
958
- tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
959
+ tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
960
961
tcg_out_b(s, 0, lb->raddr);
962
return true;
963
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
964
index XXXXXXX..XXXXXXX 100644
965
--- a/tcg/riscv/tcg-target.c.inc
966
+++ b/tcg/riscv/tcg-target.c.inc
967
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
968
*/
969
970
#if defined(CONFIG_SOFTMMU)
971
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
972
- * MemOpIdx oi, uintptr_t ra)
973
- */
974
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
975
- [MO_UB] = helper_ret_ldub_mmu,
976
- [MO_SB] = helper_ret_ldsb_mmu,
977
-#if HOST_BIG_ENDIAN
978
- [MO_UW] = helper_be_lduw_mmu,
979
- [MO_SW] = helper_be_ldsw_mmu,
980
- [MO_UL] = helper_be_ldul_mmu,
981
-#if TCG_TARGET_REG_BITS == 64
982
- [MO_SL] = helper_be_ldsl_mmu,
983
-#endif
984
- [MO_UQ] = helper_be_ldq_mmu,
985
-#else
986
- [MO_UW] = helper_le_lduw_mmu,
987
- [MO_SW] = helper_le_ldsw_mmu,
988
- [MO_UL] = helper_le_ldul_mmu,
989
-#if TCG_TARGET_REG_BITS == 64
990
- [MO_SL] = helper_le_ldsl_mmu,
991
-#endif
992
- [MO_UQ] = helper_le_ldq_mmu,
993
-#endif
994
-};
995
-
996
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
997
- * uintxx_t val, MemOpIdx oi,
998
- * uintptr_t ra)
999
- */
1000
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
1001
- [MO_8] = helper_ret_stb_mmu,
1002
-#if HOST_BIG_ENDIAN
1003
- [MO_16] = helper_be_stw_mmu,
1004
- [MO_32] = helper_be_stl_mmu,
1005
- [MO_64] = helper_be_stq_mmu,
1006
-#else
1007
- [MO_16] = helper_le_stw_mmu,
1008
- [MO_32] = helper_le_stl_mmu,
1009
- [MO_64] = helper_le_stq_mmu,
1010
-#endif
1011
-};
1012
-
1013
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1014
{
1015
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1016
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
1017
index XXXXXXX..XXXXXXX 100644
1018
--- a/tcg/s390x/tcg-target.c.inc
1019
+++ b/tcg/s390x/tcg-target.c.inc
1020
@@ -XXX,XX +XXX,XX @@ static const uint8_t tcg_cond_to_ltr_cond[] = {
1021
[TCG_COND_GEU] = S390_CC_ALWAYS,
1022
};
1023
1024
-#ifdef CONFIG_SOFTMMU
1025
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
1026
- [MO_UB] = helper_ret_ldub_mmu,
1027
- [MO_SB] = helper_ret_ldsb_mmu,
1028
- [MO_LEUW] = helper_le_lduw_mmu,
1029
- [MO_LESW] = helper_le_ldsw_mmu,
1030
- [MO_LEUL] = helper_le_ldul_mmu,
1031
- [MO_LESL] = helper_le_ldsl_mmu,
1032
- [MO_LEUQ] = helper_le_ldq_mmu,
1033
- [MO_BEUW] = helper_be_lduw_mmu,
1034
- [MO_BESW] = helper_be_ldsw_mmu,
1035
- [MO_BEUL] = helper_be_ldul_mmu,
1036
- [MO_BESL] = helper_be_ldsl_mmu,
1037
- [MO_BEUQ] = helper_be_ldq_mmu,
1038
-};
1039
-
1040
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1041
- [MO_UB] = helper_ret_stb_mmu,
1042
- [MO_LEUW] = helper_le_stw_mmu,
1043
- [MO_LEUL] = helper_le_stl_mmu,
1044
- [MO_LEUQ] = helper_le_stq_mmu,
1045
- [MO_BEUW] = helper_be_stw_mmu,
1046
- [MO_BEUL] = helper_be_stl_mmu,
1047
- [MO_BEUQ] = helper_be_stq_mmu,
1048
-};
1049
-#endif
1050
-
1051
static const tcg_insn_unit *tb_ret_addr;
1052
uint64_t s390_facilities[3];
1053
1054
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1055
}
1056
1057
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1058
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1059
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1060
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1061
1062
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1063
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1064
}
1065
1066
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1067
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1068
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1069
1070
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1071
return true;
1072
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
1073
index XXXXXXX..XXXXXXX 100644
1074
--- a/tcg/sparc64/tcg-target.c.inc
1075
+++ b/tcg/sparc64/tcg-target.c.inc
1076
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
1077
}
1078
1079
#ifdef CONFIG_SOFTMMU
1080
-static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
1081
-static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
1082
+static const tcg_insn_unit *qemu_ld_trampoline[MO_SSIZE + 1];
1083
+static const tcg_insn_unit *qemu_st_trampoline[MO_SIZE + 1];
1084
1085
static void build_trampolines(TCGContext *s)
1086
{
1087
- static void * const qemu_ld_helpers[] = {
1088
- [MO_UB] = helper_ret_ldub_mmu,
1089
- [MO_SB] = helper_ret_ldsb_mmu,
1090
- [MO_LEUW] = helper_le_lduw_mmu,
1091
- [MO_LESW] = helper_le_ldsw_mmu,
1092
- [MO_LEUL] = helper_le_ldul_mmu,
1093
- [MO_LEUQ] = helper_le_ldq_mmu,
1094
- [MO_BEUW] = helper_be_lduw_mmu,
1095
- [MO_BESW] = helper_be_ldsw_mmu,
1096
- [MO_BEUL] = helper_be_ldul_mmu,
1097
- [MO_BEUQ] = helper_be_ldq_mmu,
1098
- };
1099
- static void * const qemu_st_helpers[] = {
1100
- [MO_UB] = helper_ret_stb_mmu,
1101
- [MO_LEUW] = helper_le_stw_mmu,
1102
- [MO_LEUL] = helper_le_stl_mmu,
1103
- [MO_LEUQ] = helper_le_stq_mmu,
1104
- [MO_BEUW] = helper_be_stw_mmu,
1105
- [MO_BEUL] = helper_be_stl_mmu,
1106
- [MO_BEUQ] = helper_be_stq_mmu,
1107
- };
1108
-
1109
int i;
1110
1111
for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
1112
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1113
/* We use the helpers to extend SB and SW data, leaving the case
1114
of SL needing explicit extending below. */
1115
if ((memop & MO_SSIZE) == MO_SL) {
1116
- func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1117
+ func = qemu_ld_trampoline[MO_UL];
1118
} else {
1119
- func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1120
+ func = qemu_ld_trampoline[memop & MO_SSIZE];
1121
}
1122
tcg_debug_assert(func != NULL);
1123
tcg_out_call_nodelay(s, func, false);
1124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1125
tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
1126
TCG_REG_O2, data_type, memop & MO_SIZE, data);
1127
1128
- func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1129
+ func = qemu_st_trampoline[memop & MO_SIZE];
1130
tcg_debug_assert(func != NULL);
1131
tcg_out_call_nodelay(s, func, false);
1132
/* delay slot */
52
--
1133
--
53
2.20.1
1134
2.34.1
54
1135
55
1136
diff view generated by jsdifflib
New patch
1
TCG backends may need to defer to a helper to implement
2
the atomicity required by a given operation. Mirror the
3
interface used in system mode.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-ldst.h | 6 +-
9
accel/tcg/user-exec.c | 393 ++++++++++++++++++++++++++++-------------
10
tcg/tcg.c | 6 +-
11
3 files changed, 278 insertions(+), 127 deletions(-)
12
13
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-ldst.h
16
+++ b/include/tcg/tcg-ldst.h
17
@@ -XXX,XX +XXX,XX @@
18
#ifndef TCG_LDST_H
19
#define TCG_LDST_H
20
21
-#ifdef CONFIG_SOFTMMU
22
-
23
/* Value zero-extended to tcg register size. */
24
tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
25
MemOpIdx oi, uintptr_t retaddr);
26
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
27
void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
28
MemOpIdx oi, uintptr_t retaddr);
29
30
-#else
31
+#ifdef CONFIG_USER_ONLY
32
33
G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
34
G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
35
36
-#endif /* CONFIG_SOFTMMU */
37
+#endif /* CONFIG_USER_ONLY */
38
#endif /* TCG_LDST_H */
39
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/accel/tcg/user-exec.c
42
+++ b/accel/tcg/user-exec.c
43
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
44
45
/* The softmmu versions of these helpers are in cputlb.c. */
46
47
-/*
48
- * Verify that we have passed the correct MemOp to the correct function.
49
- *
50
- * We could present one function to target code, and dispatch based on
51
- * the MemOp, but so far we have worked hard to avoid an indirect function
52
- * call along the memory path.
53
- */
54
-static void validate_memop(MemOpIdx oi, MemOp expected)
55
-{
56
-#ifdef CONFIG_DEBUG_TCG
57
- MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
58
- assert(have == expected);
59
-#endif
60
-}
61
-
62
void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
63
{
64
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
65
@@ -XXX,XX +XXX,XX @@ void helper_unaligned_st(CPUArchState *env, target_ulong addr)
66
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
67
}
68
69
-static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
70
- MemOpIdx oi, uintptr_t ra, MMUAccessType type)
71
+static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
72
+ MemOp mop, uintptr_t ra, MMUAccessType type)
73
{
74
- MemOp mop = get_memop(oi);
75
int a_bits = get_alignment_bits(mop);
76
void *ret;
77
78
@@ -XXX,XX +XXX,XX @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
79
80
#include "ldst_atomicity.c.inc"
81
82
-uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
83
- MemOpIdx oi, uintptr_t ra)
84
+static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
85
+ MemOp mop, uintptr_t ra)
86
{
87
void *haddr;
88
uint8_t ret;
89
90
- validate_memop(oi, MO_UB);
91
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
92
+ tcg_debug_assert((mop & MO_SIZE) == MO_8);
93
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
94
ret = ldub_p(haddr);
95
clear_helper_retaddr();
96
+ return ret;
97
+}
98
+
99
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
100
+ MemOpIdx oi, uintptr_t ra)
101
+{
102
+ return do_ld1_mmu(env, addr, get_memop(oi), ra);
103
+}
104
+
105
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
106
+ MemOpIdx oi, uintptr_t ra)
107
+{
108
+ return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
109
+}
110
+
111
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
112
+ MemOpIdx oi, uintptr_t ra)
113
+{
114
+ uint8_t ret = do_ld1_mmu(env, addr, get_memop(oi), ra);
115
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
116
return ret;
117
}
118
119
+static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
120
+ MemOp mop, uintptr_t ra)
121
+{
122
+ void *haddr;
123
+ uint16_t ret;
124
+
125
+ tcg_debug_assert((mop & MO_SIZE) == MO_16);
126
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
127
+ ret = load_atom_2(env, ra, haddr, mop);
128
+ clear_helper_retaddr();
129
+ return ret;
130
+}
131
+
132
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
133
+ MemOpIdx oi, uintptr_t ra)
134
+{
135
+ MemOp mop = get_memop(oi);
136
+ uint16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
137
+
138
+ if (mop & MO_BSWAP) {
139
+ ret = bswap16(ret);
140
+ }
141
+ return ret;
142
+}
143
+
144
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
145
+ MemOpIdx oi, uintptr_t ra)
146
+{
147
+ MemOp mop = get_memop(oi);
148
+ int16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
149
+
150
+ if (mop & MO_BSWAP) {
151
+ ret = bswap16(ret);
152
+ }
153
+ return ret;
154
+}
155
+
156
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
157
MemOpIdx oi, uintptr_t ra)
158
{
159
- void *haddr;
160
+ MemOp mop = get_memop(oi);
161
uint16_t ret;
162
163
- validate_memop(oi, MO_BEUW);
164
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
165
- ret = load_atom_2(env, ra, haddr, get_memop(oi));
166
- clear_helper_retaddr();
167
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
168
+ ret = do_ld2_he_mmu(env, addr, mop, ra);
169
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
170
return cpu_to_be16(ret);
171
}
172
173
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
174
- MemOpIdx oi, uintptr_t ra)
175
-{
176
- void *haddr;
177
- uint32_t ret;
178
-
179
- validate_memop(oi, MO_BEUL);
180
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
181
- ret = load_atom_4(env, ra, haddr, get_memop(oi));
182
- clear_helper_retaddr();
183
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
184
- return cpu_to_be32(ret);
185
-}
186
-
187
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
188
- MemOpIdx oi, uintptr_t ra)
189
-{
190
- void *haddr;
191
- uint64_t ret;
192
-
193
- validate_memop(oi, MO_BEUQ);
194
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
195
- ret = load_atom_8(env, ra, haddr, get_memop(oi));
196
- clear_helper_retaddr();
197
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
198
- return cpu_to_be64(ret);
199
-}
200
-
201
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
202
MemOpIdx oi, uintptr_t ra)
203
{
204
- void *haddr;
205
+ MemOp mop = get_memop(oi);
206
uint16_t ret;
207
208
- validate_memop(oi, MO_LEUW);
209
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
210
- ret = load_atom_2(env, ra, haddr, get_memop(oi));
211
- clear_helper_retaddr();
212
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
213
+ ret = do_ld2_he_mmu(env, addr, mop, ra);
214
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
215
return cpu_to_le16(ret);
216
}
217
218
+static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
219
+ MemOp mop, uintptr_t ra)
220
+{
221
+ void *haddr;
222
+ uint32_t ret;
223
+
224
+ tcg_debug_assert((mop & MO_SIZE) == MO_32);
225
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
226
+ ret = load_atom_4(env, ra, haddr, mop);
227
+ clear_helper_retaddr();
228
+ return ret;
229
+}
230
+
231
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
232
+ MemOpIdx oi, uintptr_t ra)
233
+{
234
+ MemOp mop = get_memop(oi);
235
+ uint32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
236
+
237
+ if (mop & MO_BSWAP) {
238
+ ret = bswap32(ret);
239
+ }
240
+ return ret;
241
+}
242
+
243
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
244
+ MemOpIdx oi, uintptr_t ra)
245
+{
246
+ MemOp mop = get_memop(oi);
247
+ int32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
248
+
249
+ if (mop & MO_BSWAP) {
250
+ ret = bswap32(ret);
251
+ }
252
+ return ret;
253
+}
254
+
255
+uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
256
+ MemOpIdx oi, uintptr_t ra)
257
+{
258
+ MemOp mop = get_memop(oi);
259
+ uint32_t ret;
260
+
261
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
262
+ ret = do_ld4_he_mmu(env, addr, mop, ra);
263
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
264
+ return cpu_to_be32(ret);
265
+}
266
+
267
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
268
MemOpIdx oi, uintptr_t ra)
269
{
270
- void *haddr;
271
+ MemOp mop = get_memop(oi);
272
uint32_t ret;
273
274
- validate_memop(oi, MO_LEUL);
275
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
276
- ret = load_atom_4(env, ra, haddr, get_memop(oi));
277
- clear_helper_retaddr();
278
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
279
+ ret = do_ld4_he_mmu(env, addr, mop, ra);
280
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
281
return cpu_to_le32(ret);
282
}
283
284
+static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
285
+ MemOp mop, uintptr_t ra)
286
+{
287
+ void *haddr;
288
+ uint64_t ret;
289
+
290
+ tcg_debug_assert((mop & MO_SIZE) == MO_64);
291
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
292
+ ret = load_atom_8(env, ra, haddr, mop);
293
+ clear_helper_retaddr();
294
+ return ret;
295
+}
296
+
297
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
298
+ MemOpIdx oi, uintptr_t ra)
299
+{
300
+ MemOp mop = get_memop(oi);
301
+ uint64_t ret = do_ld8_he_mmu(env, addr, mop, ra);
302
+
303
+ if (mop & MO_BSWAP) {
304
+ ret = bswap64(ret);
305
+ }
306
+ return ret;
307
+}
308
+
309
+uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
310
+ MemOpIdx oi, uintptr_t ra)
311
+{
312
+ MemOp mop = get_memop(oi);
313
+ uint64_t ret;
314
+
315
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
316
+ ret = do_ld8_he_mmu(env, addr, mop, ra);
317
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
318
+ return cpu_to_be64(ret);
319
+}
320
+
321
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
322
MemOpIdx oi, uintptr_t ra)
323
{
324
- void *haddr;
325
+ MemOp mop = get_memop(oi);
326
uint64_t ret;
327
328
- validate_memop(oi, MO_LEUQ);
329
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
330
- ret = load_atom_8(env, ra, haddr, get_memop(oi));
331
- clear_helper_retaddr();
332
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
333
+ ret = do_ld8_he_mmu(env, addr, mop, ra);
334
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
335
return cpu_to_le64(ret);
336
}
337
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
338
void *haddr;
339
Int128 ret;
340
341
- validate_memop(oi, MO_128 | MO_BE);
342
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
343
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
344
memcpy(&ret, haddr, 16);
345
clear_helper_retaddr();
346
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
347
void *haddr;
348
Int128 ret;
349
350
- validate_memop(oi, MO_128 | MO_LE);
351
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
352
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
353
memcpy(&ret, haddr, 16);
354
clear_helper_retaddr();
355
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
356
return ret;
357
}
358
359
-void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
360
- MemOpIdx oi, uintptr_t ra)
361
+static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
362
+ MemOp mop, uintptr_t ra)
363
{
364
void *haddr;
365
366
- validate_memop(oi, MO_UB);
367
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
368
+ tcg_debug_assert((mop & MO_SIZE) == MO_8);
369
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
370
stb_p(haddr, val);
371
clear_helper_retaddr();
372
+}
373
+
374
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
375
+ MemOpIdx oi, uintptr_t ra)
376
+{
377
+ do_st1_mmu(env, addr, val, get_memop(oi), ra);
378
+}
379
+
380
+void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
381
+ MemOpIdx oi, uintptr_t ra)
382
+{
383
+ do_st1_mmu(env, addr, val, get_memop(oi), ra);
384
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
385
}
386
387
+static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
388
+ MemOp mop, uintptr_t ra)
389
+{
390
+ void *haddr;
391
+
392
+ tcg_debug_assert((mop & MO_SIZE) == MO_16);
393
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
394
+ store_atom_2(env, ra, haddr, mop, val);
395
+ clear_helper_retaddr();
396
+}
397
+
398
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
399
+ MemOpIdx oi, uintptr_t ra)
400
+{
401
+ MemOp mop = get_memop(oi);
402
+
403
+ if (mop & MO_BSWAP) {
404
+ val = bswap16(val);
405
+ }
406
+ do_st2_he_mmu(env, addr, val, mop, ra);
407
+}
408
+
409
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
410
MemOpIdx oi, uintptr_t ra)
411
{
412
- void *haddr;
413
+ MemOp mop = get_memop(oi);
414
415
- validate_memop(oi, MO_BEUW);
416
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
417
- store_atom_2(env, ra, haddr, get_memop(oi), be16_to_cpu(val));
418
- clear_helper_retaddr();
419
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
420
-}
421
-
422
-void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
423
- MemOpIdx oi, uintptr_t ra)
424
-{
425
- void *haddr;
426
-
427
- validate_memop(oi, MO_BEUL);
428
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
429
- store_atom_4(env, ra, haddr, get_memop(oi), be32_to_cpu(val));
430
- clear_helper_retaddr();
431
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
432
-}
433
-
434
-void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
435
- MemOpIdx oi, uintptr_t ra)
436
-{
437
- void *haddr;
438
-
439
- validate_memop(oi, MO_BEUQ);
440
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
441
- store_atom_8(env, ra, haddr, get_memop(oi), be64_to_cpu(val));
442
- clear_helper_retaddr();
443
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
444
+ do_st2_he_mmu(env, addr, be16_to_cpu(val), mop, ra);
445
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
446
}
447
448
void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
449
MemOpIdx oi, uintptr_t ra)
450
+{
451
+ MemOp mop = get_memop(oi);
452
+
453
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
454
+ do_st2_he_mmu(env, addr, le16_to_cpu(val), mop, ra);
455
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
456
+}
457
+
458
+static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
459
+ MemOp mop, uintptr_t ra)
460
{
461
void *haddr;
462
463
- validate_memop(oi, MO_LEUW);
464
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
465
- store_atom_2(env, ra, haddr, get_memop(oi), le16_to_cpu(val));
466
+ tcg_debug_assert((mop & MO_SIZE) == MO_32);
467
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
468
+ store_atom_4(env, ra, haddr, mop, val);
469
clear_helper_retaddr();
470
+}
471
+
472
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
473
+ MemOpIdx oi, uintptr_t ra)
474
+{
475
+ MemOp mop = get_memop(oi);
476
+
477
+ if (mop & MO_BSWAP) {
478
+ val = bswap32(val);
479
+ }
480
+ do_st4_he_mmu(env, addr, val, mop, ra);
481
+}
482
+
483
+void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
484
+ MemOpIdx oi, uintptr_t ra)
485
+{
486
+ MemOp mop = get_memop(oi);
487
+
488
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
489
+ do_st4_he_mmu(env, addr, be32_to_cpu(val), mop, ra);
490
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
491
}
492
493
void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
494
MemOpIdx oi, uintptr_t ra)
495
+{
496
+ MemOp mop = get_memop(oi);
497
+
498
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
499
+ do_st4_he_mmu(env, addr, le32_to_cpu(val), mop, ra);
500
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
501
+}
502
+
503
+static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
504
+ MemOp mop, uintptr_t ra)
505
{
506
void *haddr;
507
508
- validate_memop(oi, MO_LEUL);
509
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
510
- store_atom_4(env, ra, haddr, get_memop(oi), le32_to_cpu(val));
511
+ tcg_debug_assert((mop & MO_SIZE) == MO_64);
512
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
513
+ store_atom_8(env, ra, haddr, mop, val);
514
clear_helper_retaddr();
515
+}
516
+
517
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
518
+ MemOpIdx oi, uintptr_t ra)
519
+{
520
+ MemOp mop = get_memop(oi);
521
+
522
+ if (mop & MO_BSWAP) {
523
+ val = bswap64(val);
524
+ }
525
+ do_st8_he_mmu(env, addr, val, mop, ra);
526
+}
527
+
528
+void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
529
+ MemOpIdx oi, uintptr_t ra)
530
+{
531
+ MemOp mop = get_memop(oi);
532
+
533
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
534
+ do_st8_he_mmu(env, addr, cpu_to_be64(val), mop, ra);
535
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
536
}
537
538
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
539
MemOpIdx oi, uintptr_t ra)
540
{
541
- void *haddr;
542
+ MemOp mop = get_memop(oi);
543
544
- validate_memop(oi, MO_LEUQ);
545
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
546
- store_atom_8(env, ra, haddr, get_memop(oi), le64_to_cpu(val));
547
- clear_helper_retaddr();
548
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
549
+ do_st8_he_mmu(env, addr, cpu_to_le64(val), mop, ra);
550
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
551
}
552
553
@@ -XXX,XX +XXX,XX @@ void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
554
{
555
void *haddr;
556
557
- validate_memop(oi, MO_128 | MO_BE);
558
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
559
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
560
if (!HOST_BIG_ENDIAN) {
561
val = bswap128(val);
562
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
563
{
564
void *haddr;
565
566
- validate_memop(oi, MO_128 | MO_LE);
567
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
568
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
569
if (HOST_BIG_ENDIAN) {
570
val = bswap128(val);
571
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
572
void *haddr;
573
uint64_t ret;
574
575
- validate_memop(oi, MO_BEUQ);
576
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
577
ret = ldq_p(haddr);
578
clear_helper_retaddr();
579
diff --git a/tcg/tcg.c b/tcg/tcg.c
580
index XXXXXXX..XXXXXXX 100644
581
--- a/tcg/tcg.c
582
+++ b/tcg/tcg.c
583
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
584
const TCGLdstHelperParam *p)
585
__attribute__((unused));
586
587
-#ifdef CONFIG_SOFTMMU
588
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
589
+static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
590
[MO_UB] = helper_ldub_mmu,
591
[MO_SB] = helper_ldsb_mmu,
592
[MO_UW] = helper_lduw_mmu,
593
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
594
#endif
595
};
596
597
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
598
+static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
599
[MO_8] = helper_stb_mmu,
600
[MO_16] = helper_stw_mmu,
601
[MO_32] = helper_stl_mmu,
602
[MO_64] = helper_stq_mmu,
603
};
604
-#endif
605
606
TCGContext tcg_init_ctx;
607
__thread TCGContext *tcg_ctx;
608
--
609
2.34.1
diff view generated by jsdifflib
New patch
1
We can now fold these two pieces of code.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/tci.c | 89 -------------------------------------------------------
7
1 file changed, 89 deletions(-)
8
9
diff --git a/tcg/tci.c b/tcg/tci.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tci.c
12
+++ b/tcg/tci.c
13
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
14
MemOp mop = get_memop(oi);
15
uintptr_t ra = (uintptr_t)tb_ptr;
16
17
-#ifdef CONFIG_SOFTMMU
18
switch (mop & MO_SSIZE) {
19
case MO_UB:
20
return helper_ldub_mmu(env, taddr, oi, ra);
21
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
22
default:
23
g_assert_not_reached();
24
}
25
-#else
26
- void *haddr = g2h(env_cpu(env), taddr);
27
- unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
28
- uint64_t ret;
29
-
30
- set_helper_retaddr(ra);
31
- if (taddr & a_mask) {
32
- helper_unaligned_ld(env, taddr);
33
- }
34
- switch (mop & (MO_BSWAP | MO_SSIZE)) {
35
- case MO_UB:
36
- ret = ldub_p(haddr);
37
- break;
38
- case MO_SB:
39
- ret = ldsb_p(haddr);
40
- break;
41
- case MO_LEUW:
42
- ret = lduw_le_p(haddr);
43
- break;
44
- case MO_LESW:
45
- ret = ldsw_le_p(haddr);
46
- break;
47
- case MO_LEUL:
48
- ret = (uint32_t)ldl_le_p(haddr);
49
- break;
50
- case MO_LESL:
51
- ret = (int32_t)ldl_le_p(haddr);
52
- break;
53
- case MO_LEUQ:
54
- ret = ldq_le_p(haddr);
55
- break;
56
- case MO_BEUW:
57
- ret = lduw_be_p(haddr);
58
- break;
59
- case MO_BESW:
60
- ret = ldsw_be_p(haddr);
61
- break;
62
- case MO_BEUL:
63
- ret = (uint32_t)ldl_be_p(haddr);
64
- break;
65
- case MO_BESL:
66
- ret = (int32_t)ldl_be_p(haddr);
67
- break;
68
- case MO_BEUQ:
69
- ret = ldq_be_p(haddr);
70
- break;
71
- default:
72
- g_assert_not_reached();
73
- }
74
- clear_helper_retaddr();
75
- return ret;
76
-#endif
77
}
78
79
static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
80
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
81
MemOp mop = get_memop(oi);
82
uintptr_t ra = (uintptr_t)tb_ptr;
83
84
-#ifdef CONFIG_SOFTMMU
85
switch (mop & MO_SIZE) {
86
case MO_UB:
87
helper_stb_mmu(env, taddr, val, oi, ra);
88
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
89
default:
90
g_assert_not_reached();
91
}
92
-#else
93
- void *haddr = g2h(env_cpu(env), taddr);
94
- unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
95
-
96
- set_helper_retaddr(ra);
97
- if (taddr & a_mask) {
98
- helper_unaligned_st(env, taddr);
99
- }
100
- switch (mop & (MO_BSWAP | MO_SIZE)) {
101
- case MO_UB:
102
- stb_p(haddr, val);
103
- break;
104
- case MO_LEUW:
105
- stw_le_p(haddr, val);
106
- break;
107
- case MO_LEUL:
108
- stl_le_p(haddr, val);
109
- break;
110
- case MO_LEUQ:
111
- stq_le_p(haddr, val);
112
- break;
113
- case MO_BEUW:
114
- stw_be_p(haddr, val);
115
- break;
116
- case MO_BEUL:
117
- stl_be_p(haddr, val);
118
- break;
119
- case MO_BEUQ:
120
- stq_be_p(haddr, val);
121
- break;
122
- default:
123
- g_assert_not_reached();
124
- }
125
- clear_helper_retaddr();
126
-#endif
127
}
128
129
#if TCG_TARGET_REG_BITS == 64
130
--
131
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
accel/tcg/tcg-runtime.h | 3 +
5
include/tcg/tcg-ldst.h | 4 +
6
accel/tcg/cputlb.c | 399 +++++++++++++++++++++++++--------
7
accel/tcg/user-exec.c | 94 ++++++--
8
tcg/tcg-op.c | 173 +++++++++-----
9
accel/tcg/ldst_atomicity.c.inc | 184 +++++++++++++++
10
6 files changed, 679 insertions(+), 178 deletions(-)
1
11
12
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/tcg-runtime.h
15
+++ b/accel/tcg/tcg-runtime.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
17
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
18
#endif /* IN_HELPER_PROTO */
19
20
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
21
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
22
+
23
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
24
i32, env, tl, i32, i32, i32)
25
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
26
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/include/tcg/tcg-ldst.h
29
+++ b/include/tcg/tcg-ldst.h
30
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
31
MemOpIdx oi, uintptr_t retaddr);
32
uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
33
MemOpIdx oi, uintptr_t retaddr);
34
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
35
+ MemOpIdx oi, uintptr_t retaddr);
36
37
/* Value sign-extended to tcg register size. */
38
tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
39
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
40
MemOpIdx oi, uintptr_t retaddr);
41
void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
42
MemOpIdx oi, uintptr_t retaddr);
43
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
44
+ MemOpIdx oi, uintptr_t retaddr);
45
46
#ifdef CONFIG_USER_ONLY
47
48
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/accel/tcg/cputlb.c
51
+++ b/accel/tcg/cputlb.c
52
@@ -XXX,XX +XXX,XX @@
53
#include "qemu/plugin-memory.h"
54
#endif
55
#include "tcg/tcg-ldst.h"
56
+#include "exec/helper-proto.h"
57
58
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
59
/* #define DEBUG_TLB */
60
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
61
return (ret_be << (p->size * 8)) | x;
62
}
63
64
+/**
65
+ * do_ld_parts_be16
66
+ * @p: translation parameters
67
+ * @ret_be: accumulated data
68
+ *
69
+ * As do_ld_bytes_beN, but with one atomic load.
70
+ * 16 aligned bytes are guaranteed to cover the load.
71
+ */
72
+static Int128 do_ld_whole_be16(CPUArchState *env, uintptr_t ra,
73
+ MMULookupPageData *p, uint64_t ret_be)
74
+{
75
+ int o = p->addr & 15;
76
+ Int128 x, y = load_atomic16_or_exit(env, ra, p->haddr - o);
77
+ int size = p->size;
78
+
79
+ if (!HOST_BIG_ENDIAN) {
80
+ y = bswap128(y);
81
+ }
82
+ y = int128_lshift(y, o * 8);
83
+ y = int128_urshift(y, (16 - size) * 8);
84
+ x = int128_make64(ret_be);
85
+ x = int128_lshift(x, size * 8);
86
+ return int128_or(x, y);
87
+}
88
+
89
/*
90
* Wrapper for the above.
91
*/
92
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
93
}
94
}
95
96
+/*
97
+ * Wrapper for the above, for 8 < size < 16.
98
+ */
99
+static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
100
+ uint64_t a, int mmu_idx, MemOp mop, uintptr_t ra)
101
+{
102
+ int size = p->size;
103
+ uint64_t b;
104
+ MemOp atom;
105
+
106
+ if (unlikely(p->flags & TLB_MMIO)) {
107
+ p->size = size - 8;
108
+ a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra);
109
+ p->addr += p->size;
110
+ p->size = 8;
111
+ b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra);
112
+ return int128_make128(b, a);
113
+ }
114
+
115
+ /*
116
+ * It is a given that we cross a page and therefore there is no
117
+ * atomicity for the load as a whole, but subobjects may need attention.
118
+ */
119
+ atom = mop & MO_ATOM_MASK;
120
+ switch (atom) {
121
+ case MO_ATOM_SUBALIGN:
122
+ p->size = size - 8;
123
+ a = do_ld_parts_beN(p, a);
124
+ p->haddr += size - 8;
125
+ p->size = 8;
126
+ b = do_ld_parts_beN(p, 0);
127
+ break;
128
+
129
+ case MO_ATOM_WITHIN16_PAIR:
130
+ /* Since size > 8, this is the half that must be atomic. */
131
+ return do_ld_whole_be16(env, ra, p, a);
132
+
133
+ case MO_ATOM_IFALIGN_PAIR:
134
+ /*
135
+ * Since size > 8, both halves are misaligned,
136
+ * and so neither is atomic.
137
+ */
138
+ case MO_ATOM_IFALIGN:
139
+ case MO_ATOM_WITHIN16:
140
+ case MO_ATOM_NONE:
141
+ p->size = size - 8;
142
+ a = do_ld_bytes_beN(p, a);
143
+ b = ldq_be_p(p->haddr + size - 8);
144
+ break;
145
+
146
+ default:
147
+ g_assert_not_reached();
148
+ }
149
+
150
+ return int128_make128(b, a);
151
+}
152
+
153
static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
154
MMUAccessType type, uintptr_t ra)
155
{
156
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
157
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
158
}
159
160
+static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
161
+ MemOpIdx oi, uintptr_t ra)
162
+{
163
+ MMULookupLocals l;
164
+ bool crosspage;
165
+ uint64_t a, b;
166
+ Int128 ret;
167
+ int first;
168
+
169
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
170
+ if (likely(!crosspage)) {
171
+ /* Perform the load host endian. */
172
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
173
+ QEMU_IOTHREAD_LOCK_GUARD();
174
+ a = io_readx(env, l.page[0].full, l.mmu_idx, addr,
175
+ ra, MMU_DATA_LOAD, MO_64);
176
+ b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8,
177
+ ra, MMU_DATA_LOAD, MO_64);
178
+ ret = int128_make128(HOST_BIG_ENDIAN ? b : a,
179
+ HOST_BIG_ENDIAN ? a : b);
180
+ } else {
181
+ ret = load_atom_16(env, ra, l.page[0].haddr, l.memop);
182
+ }
183
+ if (l.memop & MO_BSWAP) {
184
+ ret = bswap128(ret);
185
+ }
186
+ return ret;
187
+ }
188
+
189
+ first = l.page[0].size;
190
+ if (first == 8) {
191
+ MemOp mop8 = (l.memop & ~MO_SIZE) | MO_64;
192
+
193
+ a = do_ld_8(env, &l.page[0], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
194
+ b = do_ld_8(env, &l.page[1], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
195
+ if ((mop8 & MO_BSWAP) == MO_LE) {
196
+ ret = int128_make128(a, b);
197
+ } else {
198
+ ret = int128_make128(b, a);
199
+ }
200
+ return ret;
201
+ }
202
+
203
+ if (first < 8) {
204
+ a = do_ld_beN(env, &l.page[0], 0, l.mmu_idx,
205
+ MMU_DATA_LOAD, l.memop, ra);
206
+ ret = do_ld16_beN(env, &l.page[1], a, l.mmu_idx, l.memop, ra);
207
+ } else {
208
+ ret = do_ld16_beN(env, &l.page[0], 0, l.mmu_idx, l.memop, ra);
209
+ b = int128_getlo(ret);
210
+ ret = int128_lshift(ret, l.page[1].size * 8);
211
+ a = int128_gethi(ret);
212
+ b = do_ld_beN(env, &l.page[1], b, l.mmu_idx,
213
+ MMU_DATA_LOAD, l.memop, ra);
214
+ ret = int128_make128(b, a);
215
+ }
216
+ if ((l.memop & MO_BSWAP) == MO_LE) {
217
+ ret = bswap128(ret);
218
+ }
219
+ return ret;
220
+}
221
+
222
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
223
+ uint32_t oi, uintptr_t retaddr)
224
+{
225
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
226
+ return do_ld16_mmu(env, addr, oi, retaddr);
227
+}
228
+
229
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
230
+{
231
+ return helper_ld16_mmu(env, addr, oi, GETPC());
232
+}
233
+
234
/*
235
* Load helpers for cpu_ldst.h.
236
*/
237
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
238
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
239
MemOpIdx oi, uintptr_t ra)
240
{
241
- MemOp mop = get_memop(oi);
242
- int mmu_idx = get_mmuidx(oi);
243
- MemOpIdx new_oi;
244
- unsigned a_bits;
245
- uint64_t h, l;
246
+ Int128 ret;
247
248
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
249
- a_bits = get_alignment_bits(mop);
250
-
251
- /* Handle CPU specific unaligned behaviour */
252
- if (addr & ((1 << a_bits) - 1)) {
253
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
254
- mmu_idx, ra);
255
- }
256
-
257
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
258
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
259
- new_oi = make_memop_idx(mop, mmu_idx);
260
-
261
- h = helper_ldq_mmu(env, addr, new_oi, ra);
262
- l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
263
-
264
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
265
- return int128_make128(l, h);
266
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
267
+ ret = do_ld16_mmu(env, addr, oi, ra);
268
+ plugin_load_cb(env, addr, oi);
269
+ return ret;
270
}
271
272
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
273
MemOpIdx oi, uintptr_t ra)
274
{
275
- MemOp mop = get_memop(oi);
276
- int mmu_idx = get_mmuidx(oi);
277
- MemOpIdx new_oi;
278
- unsigned a_bits;
279
- uint64_t h, l;
280
+ Int128 ret;
281
282
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
283
- a_bits = get_alignment_bits(mop);
284
-
285
- /* Handle CPU specific unaligned behaviour */
286
- if (addr & ((1 << a_bits) - 1)) {
287
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
288
- mmu_idx, ra);
289
- }
290
-
291
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
292
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
293
- new_oi = make_memop_idx(mop, mmu_idx);
294
-
295
- l = helper_ldq_mmu(env, addr, new_oi, ra);
296
- h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
297
-
298
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
299
- return int128_make128(l, h);
300
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
301
+ ret = do_ld16_mmu(env, addr, oi, ra);
302
+ plugin_load_cb(env, addr, oi);
303
+ return ret;
304
}
305
306
/*
307
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
308
}
309
}
310
311
+/*
312
+ * Wrapper for the above, for 8 < size < 16.
313
+ */
314
+static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
315
+ Int128 val_le, int mmu_idx,
316
+ MemOp mop, uintptr_t ra)
317
+{
318
+ int size = p->size;
319
+ MemOp atom;
320
+
321
+ if (unlikely(p->flags & TLB_MMIO)) {
322
+ p->size = 8;
323
+ do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra);
324
+ p->size = size - 8;
325
+ p->addr += 8;
326
+ return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra);
327
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
328
+ return int128_gethi(val_le) >> ((size - 8) * 8);
329
+ }
330
+
331
+ /*
332
+ * It is a given that we cross a page and therefore there is no atomicity
333
+ * for the store as a whole, but subobjects may need attention.
334
+ */
335
+ atom = mop & MO_ATOM_MASK;
336
+ switch (atom) {
337
+ case MO_ATOM_SUBALIGN:
338
+ store_parts_leN(p->haddr, 8, int128_getlo(val_le));
339
+ return store_parts_leN(p->haddr + 8, p->size - 8,
340
+ int128_gethi(val_le));
341
+
342
+ case MO_ATOM_WITHIN16_PAIR:
343
+ /* Since size > 8, this is the half that must be atomic. */
344
+ if (!HAVE_al16) {
345
+ cpu_loop_exit_atomic(env_cpu(env), ra);
346
+ }
347
+ return store_whole_le16(p->haddr, p->size, val_le);
348
+
349
+ case MO_ATOM_IFALIGN_PAIR:
350
+ /*
351
+ * Since size > 8, both halves are misaligned,
352
+ * and so neither is atomic.
353
+ */
354
+ case MO_ATOM_IFALIGN:
355
+ case MO_ATOM_NONE:
356
+ stq_le_p(p->haddr, int128_getlo(val_le));
357
+ return store_bytes_leN(p->haddr + 8, p->size - 8,
358
+ int128_gethi(val_le));
359
+
360
+ default:
361
+ g_assert_not_reached();
362
+ }
363
+}
364
+
365
static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
366
int mmu_idx, uintptr_t ra)
367
{
368
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
369
do_st8_mmu(env, addr, val, oi, retaddr);
370
}
371
372
+static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
373
+ MemOpIdx oi, uintptr_t ra)
374
+{
375
+ MMULookupLocals l;
376
+ bool crosspage;
377
+ uint64_t a, b;
378
+ int first;
379
+
380
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
381
+ if (likely(!crosspage)) {
382
+ /* Swap to host endian if necessary, then store. */
383
+ if (l.memop & MO_BSWAP) {
384
+ val = bswap128(val);
385
+ }
386
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
387
+ QEMU_IOTHREAD_LOCK_GUARD();
388
+ if (HOST_BIG_ENDIAN) {
389
+ b = int128_getlo(val), a = int128_gethi(val);
390
+ } else {
391
+ a = int128_getlo(val), b = int128_gethi(val);
392
+ }
393
+ io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64);
394
+ io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64);
395
+ } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
396
+ /* nothing */
397
+ } else {
398
+ store_atom_16(env, ra, l.page[0].haddr, l.memop, val);
399
+ }
400
+ return;
401
+ }
402
+
403
+ first = l.page[0].size;
404
+ if (first == 8) {
405
+ MemOp mop8 = (l.memop & ~(MO_SIZE | MO_BSWAP)) | MO_64;
406
+
407
+ if (l.memop & MO_BSWAP) {
408
+ val = bswap128(val);
409
+ }
410
+ if (HOST_BIG_ENDIAN) {
411
+ b = int128_getlo(val), a = int128_gethi(val);
412
+ } else {
413
+ a = int128_getlo(val), b = int128_gethi(val);
414
+ }
415
+ do_st_8(env, &l.page[0], a, l.mmu_idx, mop8, ra);
416
+ do_st_8(env, &l.page[1], b, l.mmu_idx, mop8, ra);
417
+ return;
418
+ }
419
+
420
+ if ((l.memop & MO_BSWAP) != MO_LE) {
421
+ val = bswap128(val);
422
+ }
423
+ if (first < 8) {
424
+ do_st_leN(env, &l.page[0], int128_getlo(val), l.mmu_idx, l.memop, ra);
425
+ val = int128_urshift(val, first * 8);
426
+ do_st16_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
427
+ } else {
428
+ b = do_st16_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
429
+ do_st_leN(env, &l.page[1], b, l.mmu_idx, l.memop, ra);
430
+ }
431
+}
432
+
433
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
434
+ MemOpIdx oi, uintptr_t retaddr)
435
+{
436
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
437
+ do_st16_mmu(env, addr, val, oi, retaddr);
438
+}
439
+
440
+void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
441
+ MemOpIdx oi)
442
+{
443
+ helper_st16_mmu(env, addr, val, oi, GETPC());
444
+}
445
+
446
/*
447
* Store Helpers for cpu_ldst.h
448
*/
449
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
450
plugin_store_cb(env, addr, oi);
451
}
452
453
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
454
- MemOpIdx oi, uintptr_t ra)
455
+void cpu_st16_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
456
+ MemOpIdx oi, uintptr_t retaddr)
457
{
458
- MemOp mop = get_memop(oi);
459
- int mmu_idx = get_mmuidx(oi);
460
- MemOpIdx new_oi;
461
- unsigned a_bits;
462
-
463
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
464
- a_bits = get_alignment_bits(mop);
465
-
466
- /* Handle CPU specific unaligned behaviour */
467
- if (addr & ((1 << a_bits) - 1)) {
468
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
469
- mmu_idx, ra);
470
- }
471
-
472
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
473
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
474
- new_oi = make_memop_idx(mop, mmu_idx);
475
-
476
- helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
477
- helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
478
-
479
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
480
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
481
+ do_st16_mmu(env, addr, val, oi, retaddr);
482
+ plugin_store_cb(env, addr, oi);
483
}
484
485
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
486
- MemOpIdx oi, uintptr_t ra)
487
+void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
488
+ MemOpIdx oi, uintptr_t retaddr)
489
{
490
- MemOp mop = get_memop(oi);
491
- int mmu_idx = get_mmuidx(oi);
492
- MemOpIdx new_oi;
493
- unsigned a_bits;
494
-
495
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
496
- a_bits = get_alignment_bits(mop);
497
-
498
- /* Handle CPU specific unaligned behaviour */
499
- if (addr & ((1 << a_bits) - 1)) {
500
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
501
- mmu_idx, ra);
502
- }
503
-
504
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
505
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
506
- new_oi = make_memop_idx(mop, mmu_idx);
507
-
508
- helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
509
- helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
510
-
511
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
512
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
513
+ do_st16_mmu(env, addr, val, oi, retaddr);
514
+ plugin_store_cb(env, addr, oi);
515
}
516
517
#include "ldst_common.c.inc"
518
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
519
index XXXXXXX..XXXXXXX 100644
520
--- a/accel/tcg/user-exec.c
521
+++ b/accel/tcg/user-exec.c
522
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
523
return cpu_to_le64(ret);
524
}
525
526
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
527
- MemOpIdx oi, uintptr_t ra)
528
+static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
529
+ MemOp mop, uintptr_t ra)
530
{
531
void *haddr;
532
Int128 ret;
533
534
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
535
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
536
- memcpy(&ret, haddr, 16);
537
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
538
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
539
+ ret = load_atom_16(env, ra, haddr, mop);
540
clear_helper_retaddr();
541
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
542
+ return ret;
543
+}
544
545
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
546
+ MemOpIdx oi, uintptr_t ra)
547
+{
548
+ MemOp mop = get_memop(oi);
549
+ Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
550
+
551
+ if (mop & MO_BSWAP) {
552
+ ret = bswap128(ret);
553
+ }
554
+ return ret;
555
+}
556
+
557
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
558
+{
559
+ return helper_ld16_mmu(env, addr, oi, GETPC());
560
+}
561
+
562
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
563
+ MemOpIdx oi, uintptr_t ra)
564
+{
565
+ MemOp mop = get_memop(oi);
566
+ Int128 ret;
567
+
568
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
569
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
570
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
571
if (!HOST_BIG_ENDIAN) {
572
ret = bswap128(ret);
573
}
574
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
575
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
576
MemOpIdx oi, uintptr_t ra)
577
{
578
- void *haddr;
579
+ MemOp mop = get_memop(oi);
580
Int128 ret;
581
582
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
583
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
584
- memcpy(&ret, haddr, 16);
585
- clear_helper_retaddr();
586
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
587
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
588
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
589
-
590
if (HOST_BIG_ENDIAN) {
591
ret = bswap128(ret);
592
}
593
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
594
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
595
}
596
597
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
598
- Int128 val, MemOpIdx oi, uintptr_t ra)
599
+static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
600
+ MemOp mop, uintptr_t ra)
601
{
602
void *haddr;
603
604
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
605
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
606
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
607
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
608
+ store_atom_16(env, ra, haddr, mop, val);
609
+ clear_helper_retaddr();
610
+}
611
+
612
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
613
+ MemOpIdx oi, uintptr_t ra)
614
+{
615
+ MemOp mop = get_memop(oi);
616
+
617
+ if (mop & MO_BSWAP) {
618
+ val = bswap128(val);
619
+ }
620
+ do_st16_he_mmu(env, addr, val, mop, ra);
621
+}
622
+
623
+void helper_st_i128(CPUArchState *env, target_ulong addr,
624
+ Int128 val, MemOpIdx oi)
625
+{
626
+ helper_st16_mmu(env, addr, val, oi, GETPC());
627
+}
628
+
629
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
630
+ Int128 val, MemOpIdx oi, uintptr_t ra)
631
+{
632
+ MemOp mop = get_memop(oi);
633
+
634
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
635
if (!HOST_BIG_ENDIAN) {
636
val = bswap128(val);
637
}
638
- memcpy(haddr, &val, 16);
639
- clear_helper_retaddr();
640
+ do_st16_he_mmu(env, addr, val, mop, ra);
641
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
642
}
643
644
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
645
Int128 val, MemOpIdx oi, uintptr_t ra)
646
{
647
- void *haddr;
648
+ MemOp mop = get_memop(oi);
649
650
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
651
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
652
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
653
if (HOST_BIG_ENDIAN) {
654
val = bswap128(val);
655
}
656
- memcpy(haddr, &val, 16);
657
- clear_helper_retaddr();
658
+ do_st16_he_mmu(env, addr, val, mop, ra);
659
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
660
}
661
662
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
663
index XXXXXXX..XXXXXXX 100644
664
--- a/tcg/tcg-op.c
665
+++ b/tcg/tcg-op.c
666
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
667
}
668
}
669
670
+/*
671
+ * Return true if @mop, without knowledge of the pointer alignment,
672
+ * does not require 16-byte atomicity, and it would be adventagous
673
+ * to avoid a call to a helper function.
674
+ */
675
+static bool use_two_i64_for_i128(MemOp mop)
676
+{
677
+#ifdef CONFIG_SOFTMMU
678
+ /* Two softmmu tlb lookups is larger than one function call. */
679
+ return false;
680
+#else
681
+ /*
682
+ * For user-only, two 64-bit operations may well be smaller than a call.
683
+ * Determine if that would be legal for the requested atomicity.
684
+ */
685
+ switch (mop & MO_ATOM_MASK) {
686
+ case MO_ATOM_NONE:
687
+ case MO_ATOM_IFALIGN_PAIR:
688
+ return true;
689
+ case MO_ATOM_IFALIGN:
690
+ case MO_ATOM_SUBALIGN:
691
+ case MO_ATOM_WITHIN16:
692
+ case MO_ATOM_WITHIN16_PAIR:
693
+ /* In a serialized context, no atomicity is required. */
694
+ return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
695
+ default:
696
+ g_assert_not_reached();
697
+ }
698
+#endif
699
+}
700
+
701
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
702
{
703
MemOp mop_1 = orig, mop_2;
704
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
705
ret[1] = mop_2;
706
}
707
708
+#if TARGET_LONG_BITS == 64
709
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
710
+#else
711
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
712
+#endif
713
+
714
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
715
{
716
- MemOp mop[2];
717
- TCGv addr_p8;
718
- TCGv_i64 x, y;
719
+ MemOpIdx oi = make_memop_idx(memop, idx);
720
721
- canonicalize_memop_i128_as_i64(mop, memop);
722
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
723
+ tcg_debug_assert((memop & MO_SIGN) == 0);
724
725
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
726
addr = plugin_prep_mem_callbacks(addr);
727
728
- /* TODO: respect atomicity of the operation. */
729
/* TODO: allow the tcg backend to see the whole operation. */
730
731
- /*
732
- * Since there are no global TCGv_i128, there is no visible state
733
- * changed if the second load faults. Load directly into the two
734
- * subwords.
735
- */
736
- if ((memop & MO_BSWAP) == MO_LE) {
737
- x = TCGV128_LOW(val);
738
- y = TCGV128_HIGH(val);
739
+ if (use_two_i64_for_i128(memop)) {
740
+ MemOp mop[2];
741
+ TCGv addr_p8;
742
+ TCGv_i64 x, y;
743
+
744
+ canonicalize_memop_i128_as_i64(mop, memop);
745
+
746
+ /*
747
+ * Since there are no global TCGv_i128, there is no visible state
748
+ * changed if the second load faults. Load directly into the two
749
+ * subwords.
750
+ */
751
+ if ((memop & MO_BSWAP) == MO_LE) {
752
+ x = TCGV128_LOW(val);
753
+ y = TCGV128_HIGH(val);
754
+ } else {
755
+ x = TCGV128_HIGH(val);
756
+ y = TCGV128_LOW(val);
757
+ }
758
+
759
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
760
+
761
+ if ((mop[0] ^ memop) & MO_BSWAP) {
762
+ tcg_gen_bswap64_i64(x, x);
763
+ }
764
+
765
+ addr_p8 = tcg_temp_ebb_new();
766
+ tcg_gen_addi_tl(addr_p8, addr, 8);
767
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
768
+ tcg_temp_free(addr_p8);
769
+
770
+ if ((mop[0] ^ memop) & MO_BSWAP) {
771
+ tcg_gen_bswap64_i64(y, y);
772
+ }
773
} else {
774
- x = TCGV128_HIGH(val);
775
- y = TCGV128_LOW(val);
776
+ gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
777
}
778
779
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
780
-
781
- if ((mop[0] ^ memop) & MO_BSWAP) {
782
- tcg_gen_bswap64_i64(x, x);
783
- }
784
-
785
- addr_p8 = tcg_temp_new();
786
- tcg_gen_addi_tl(addr_p8, addr, 8);
787
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
788
- tcg_temp_free(addr_p8);
789
-
790
- if ((mop[0] ^ memop) & MO_BSWAP) {
791
- tcg_gen_bswap64_i64(y, y);
792
- }
793
-
794
- plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
795
- QEMU_PLUGIN_MEM_R);
796
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
797
}
798
799
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
800
{
801
- MemOp mop[2];
802
- TCGv addr_p8;
803
- TCGv_i64 x, y;
804
+ MemOpIdx oi = make_memop_idx(memop, idx);
805
806
- canonicalize_memop_i128_as_i64(mop, memop);
807
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
808
+ tcg_debug_assert((memop & MO_SIGN) == 0);
809
810
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
811
addr = plugin_prep_mem_callbacks(addr);
812
813
- /* TODO: respect atomicity of the operation. */
814
/* TODO: allow the tcg backend to see the whole operation. */
815
816
- if ((memop & MO_BSWAP) == MO_LE) {
817
- x = TCGV128_LOW(val);
818
- y = TCGV128_HIGH(val);
819
+ if (use_two_i64_for_i128(memop)) {
820
+ MemOp mop[2];
821
+ TCGv addr_p8;
822
+ TCGv_i64 x, y;
823
+
824
+ canonicalize_memop_i128_as_i64(mop, memop);
825
+
826
+ if ((memop & MO_BSWAP) == MO_LE) {
827
+ x = TCGV128_LOW(val);
828
+ y = TCGV128_HIGH(val);
829
+ } else {
830
+ x = TCGV128_HIGH(val);
831
+ y = TCGV128_LOW(val);
832
+ }
833
+
834
+ addr_p8 = tcg_temp_ebb_new();
835
+ if ((mop[0] ^ memop) & MO_BSWAP) {
836
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
837
+
838
+ tcg_gen_bswap64_i64(t, x);
839
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
840
+ tcg_gen_bswap64_i64(t, y);
841
+ tcg_gen_addi_tl(addr_p8, addr, 8);
842
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
843
+ tcg_temp_free_i64(t);
844
+ } else {
845
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
846
+ tcg_gen_addi_tl(addr_p8, addr, 8);
847
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
848
+ }
849
+ tcg_temp_free(addr_p8);
850
} else {
851
- x = TCGV128_HIGH(val);
852
- y = TCGV128_LOW(val);
853
+ gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
854
}
855
856
- addr_p8 = tcg_temp_new();
857
- if ((mop[0] ^ memop) & MO_BSWAP) {
858
- TCGv_i64 t = tcg_temp_ebb_new_i64();
859
-
860
- tcg_gen_bswap64_i64(t, x);
861
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
862
- tcg_gen_bswap64_i64(t, y);
863
- tcg_gen_addi_tl(addr_p8, addr, 8);
864
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
865
- tcg_temp_free_i64(t);
866
- } else {
867
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
868
- tcg_gen_addi_tl(addr_p8, addr, 8);
869
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
870
- }
871
- tcg_temp_free(addr_p8);
872
-
873
- plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
874
- QEMU_PLUGIN_MEM_W);
875
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
876
}
877
878
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
879
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
880
index XXXXXXX..XXXXXXX 100644
881
--- a/accel/tcg/ldst_atomicity.c.inc
882
+++ b/accel/tcg/ldst_atomicity.c.inc
883
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atom_8_by_4(void *pv)
884
}
885
}
886
887
+/**
888
+ * load_atom_8_by_8_or_4:
889
+ * @pv: host address
890
+ *
891
+ * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
892
+ */
893
+static inline uint64_t load_atom_8_by_8_or_4(void *pv)
894
+{
895
+ if (HAVE_al8_fast) {
896
+ return load_atomic8(pv);
897
+ } else {
898
+ return load_atom_8_by_4(pv);
899
+ }
900
+}
901
+
902
/**
903
* load_atom_2:
904
* @p: host address
905
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
906
}
907
}
908
909
+/**
910
+ * load_atom_16:
911
+ * @p: host address
912
+ * @memop: the full memory op
913
+ *
914
+ * Load 16 bytes from @p, honoring the atomicity of @memop.
915
+ */
916
+static Int128 load_atom_16(CPUArchState *env, uintptr_t ra,
917
+ void *pv, MemOp memop)
918
+{
919
+ uintptr_t pi = (uintptr_t)pv;
920
+ int atmax;
921
+ Int128 r;
922
+ uint64_t a, b;
923
+
924
+ /*
925
+ * If the host does not support 16-byte atomics, wait until we have
926
+ * examined the atomicity parameters below.
927
+ */
928
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
929
+ return load_atomic16(pv);
930
+ }
931
+
932
+ atmax = required_atomicity(env, pi, memop);
933
+ switch (atmax) {
934
+ case MO_8:
935
+ memcpy(&r, pv, 16);
936
+ return r;
937
+ case MO_16:
938
+ a = load_atom_8_by_2(pv);
939
+ b = load_atom_8_by_2(pv + 8);
940
+ break;
941
+ case MO_32:
942
+ a = load_atom_8_by_4(pv);
943
+ b = load_atom_8_by_4(pv + 8);
944
+ break;
945
+ case MO_64:
946
+ if (!HAVE_al8) {
947
+ cpu_loop_exit_atomic(env_cpu(env), ra);
948
+ }
949
+ a = load_atomic8(pv);
950
+ b = load_atomic8(pv + 8);
951
+ break;
952
+ case -MO_64:
953
+ if (!HAVE_al8) {
954
+ cpu_loop_exit_atomic(env_cpu(env), ra);
955
+ }
956
+ a = load_atom_extract_al8x2(pv);
957
+ b = load_atom_extract_al8x2(pv + 8);
958
+ break;
959
+ case MO_128:
960
+ return load_atomic16_or_exit(env, ra, pv);
961
+ default:
962
+ g_assert_not_reached();
963
+ }
964
+ return int128_make128(HOST_BIG_ENDIAN ? b : a, HOST_BIG_ENDIAN ? a : b);
965
+}
966
+
967
/**
968
* store_atomic2:
969
* @pv: host address
970
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
971
qatomic_set__nocheck(p, val);
972
}
973
974
+/**
975
+ * store_atomic16:
976
+ * @pv: host address
977
+ * @val: value to store
978
+ *
979
+ * Atomically store 16 aligned bytes to @pv.
980
+ */
981
+static inline void store_atomic16(void *pv, Int128Alias val)
982
+{
983
+#if defined(CONFIG_ATOMIC128)
984
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
985
+ qatomic_set__nocheck(pu, val.u);
986
+#elif defined(CONFIG_CMPXCHG128)
987
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
988
+ __uint128_t o;
989
+
990
+ /*
991
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
992
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
993
+ * and accept the sequential consistency that comes with it.
994
+ */
995
+ do {
996
+ o = *pu;
997
+ } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
998
+#else
999
+ qemu_build_not_reached();
1000
+#endif
1001
+}
1002
+
1003
/**
1004
* store_atom_4x2
1005
*/
1006
@@ -XXX,XX +XXX,XX @@ static void store_atom_8(CPUArchState *env, uintptr_t ra,
1007
}
1008
cpu_loop_exit_atomic(env_cpu(env), ra);
1009
}
1010
+
1011
+/**
1012
+ * store_atom_16:
1013
+ * @p: host address
1014
+ * @val: the value to store
1015
+ * @memop: the full memory op
1016
+ *
1017
+ * Store 16 bytes to @p, honoring the atomicity of @memop.
1018
+ */
1019
+static void store_atom_16(CPUArchState *env, uintptr_t ra,
1020
+ void *pv, MemOp memop, Int128 val)
1021
+{
1022
+ uintptr_t pi = (uintptr_t)pv;
1023
+ uint64_t a, b;
1024
+ int atmax;
1025
+
1026
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
1027
+ store_atomic16(pv, val);
1028
+ return;
1029
+ }
1030
+
1031
+ atmax = required_atomicity(env, pi, memop);
1032
+
1033
+ a = HOST_BIG_ENDIAN ? int128_gethi(val) : int128_getlo(val);
1034
+ b = HOST_BIG_ENDIAN ? int128_getlo(val) : int128_gethi(val);
1035
+ switch (atmax) {
1036
+ case MO_8:
1037
+ memcpy(pv, &val, 16);
1038
+ return;
1039
+ case MO_16:
1040
+ store_atom_8_by_2(pv, a);
1041
+ store_atom_8_by_2(pv + 8, b);
1042
+ return;
1043
+ case MO_32:
1044
+ store_atom_8_by_4(pv, a);
1045
+ store_atom_8_by_4(pv + 8, b);
1046
+ return;
1047
+ case MO_64:
1048
+ if (HAVE_al8) {
1049
+ store_atomic8(pv, a);
1050
+ store_atomic8(pv + 8, b);
1051
+ return;
1052
+ }
1053
+ break;
1054
+ case -MO_64:
1055
+ if (HAVE_al16) {
1056
+ uint64_t val_le;
1057
+ int s2 = pi & 15;
1058
+ int s1 = 16 - s2;
1059
+
1060
+ if (HOST_BIG_ENDIAN) {
1061
+ val = bswap128(val);
1062
+ }
1063
+ switch (s2) {
1064
+ case 1 ... 7:
1065
+ val_le = store_whole_le16(pv, s1, val);
1066
+ store_bytes_leN(pv + s1, s2, val_le);
1067
+ break;
1068
+ case 9 ... 15:
1069
+ store_bytes_leN(pv, s1, int128_getlo(val));
1070
+ val = int128_urshift(val, s1 * 8);
1071
+ store_whole_le16(pv + s1, s2, val);
1072
+ break;
1073
+ case 0: /* aligned */
1074
+ case 8: /* atmax MO_64 */
1075
+ default:
1076
+ g_assert_not_reached();
1077
+ }
1078
+ return;
1079
+ }
1080
+ break;
1081
+ case MO_128:
1082
+ if (HAVE_al16) {
1083
+ store_atomic16(pv, val);
1084
+ return;
1085
+ }
1086
+ break;
1087
+ default:
1088
+ g_assert_not_reached();
1089
+ }
1090
+ cpu_loop_exit_atomic(env_cpu(env), ra);
1091
+}
1092
--
1093
2.34.1
diff view generated by jsdifflib
New patch
1
There is an edge condition prior to gcc13 for which optimization
2
is required to generate 16-byte atomic sequences. Detect this.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
meson.build | 52 ++++++++++++++++++++++------------
8
accel/tcg/ldst_atomicity.c.inc | 29 ++++++++++++++++---
9
2 files changed, 59 insertions(+), 22 deletions(-)
10
11
diff --git a/meson.build b/meson.build
12
index XXXXXXX..XXXXXXX 100644
13
--- a/meson.build
14
+++ b/meson.build
15
@@ -XXX,XX +XXX,XX @@ config_host_data.set('HAVE_BROKEN_SIZE_MAX', not cc.compiles('''
16
return printf("%zu", SIZE_MAX);
17
}''', args: ['-Werror']))
18
19
-atomic_test = '''
20
+# See if 64-bit atomic operations are supported.
21
+# Note that without __atomic builtins, we can only
22
+# assume atomic loads/stores max at pointer size.
23
+config_host_data.set('CONFIG_ATOMIC64', cc.links('''
24
#include <stdint.h>
25
int main(void)
26
{
27
- @0@ x = 0, y = 0;
28
+ uint64_t x = 0, y = 0;
29
y = __atomic_load_n(&x, __ATOMIC_RELAXED);
30
__atomic_store_n(&x, y, __ATOMIC_RELAXED);
31
__atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
32
__atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
33
__atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
34
return 0;
35
- }'''
36
-
37
-# See if 64-bit atomic operations are supported.
38
-# Note that without __atomic builtins, we can only
39
-# assume atomic loads/stores max at pointer size.
40
-config_host_data.set('CONFIG_ATOMIC64', cc.links(atomic_test.format('uint64_t')))
41
+ }'''))
42
43
has_int128 = cc.links('''
44
__int128_t a;
45
@@ -XXX,XX +XXX,XX @@ if has_int128
46
# "do we have 128-bit atomics which are handled inline and specifically not
47
# via libatomic". The reason we can't use libatomic is documented in the
48
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
49
- has_atomic128 = cc.links(atomic_test.format('unsigned __int128'))
50
+ # We only care about these operations on 16-byte aligned pointers, so
51
+ # force 16-byte alignment of the pointer, which may be greater than
52
+ # __alignof(unsigned __int128) for the host.
53
+ atomic_test_128 = '''
54
+ int main(int ac, char **av) {
55
+ unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16));
56
+ p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
57
+ __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
58
+ __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
59
+ return 0;
60
+ }'''
61
+ has_atomic128 = cc.links(atomic_test_128)
62
63
config_host_data.set('CONFIG_ATOMIC128', has_atomic128)
64
65
if not has_atomic128
66
- has_cmpxchg128 = cc.links('''
67
- int main(void)
68
- {
69
- unsigned __int128 x = 0, y = 0;
70
- __sync_val_compare_and_swap_16(&x, y, x);
71
- return 0;
72
- }
73
- ''')
74
+ # Even with __builtin_assume_aligned, the above test may have failed
75
+ # without optimization enabled. Try again with optimizations locally
76
+ # enabled for the function. See
77
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
78
+ has_atomic128_opt = cc.links('__attribute__((optimize("O1")))' + atomic_test_128)
79
+ config_host_data.set('CONFIG_ATOMIC128_OPT', has_atomic128_opt)
80
81
- config_host_data.set('CONFIG_CMPXCHG128', has_cmpxchg128)
82
+ if not has_atomic128_opt
83
+ config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
84
+ int main(void)
85
+ {
86
+ unsigned __int128 x = 0, y = 0;
87
+ __sync_val_compare_and_swap_16(&x, y, x);
88
+ return 0;
89
+ }
90
+ '''))
91
+ endif
92
endif
93
endif
94
95
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
96
index XXXXXXX..XXXXXXX 100644
97
--- a/accel/tcg/ldst_atomicity.c.inc
98
+++ b/accel/tcg/ldst_atomicity.c.inc
99
@@ -XXX,XX +XXX,XX @@
100
#endif
101
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
102
103
+/*
104
+ * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
105
+ * that are supported by the host, e.g. s390x. We can force the pointer to
106
+ * have our known alignment with __builtin_assume_aligned, however prior to
107
+ * GCC 13 that was only reliable with optimization enabled. See
108
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
109
+ */
110
+#if defined(CONFIG_ATOMIC128_OPT)
111
+# if !defined(__OPTIMIZE__)
112
+# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
113
+# endif
114
+# define CONFIG_ATOMIC128
115
+#endif
116
+#ifndef ATTRIBUTE_ATOMIC128_OPT
117
+# define ATTRIBUTE_ATOMIC128_OPT
118
+#endif
119
+
120
#if defined(CONFIG_ATOMIC128)
121
# define HAVE_al16_fast true
122
#else
123
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atomic8(void *pv)
124
*
125
* Atomically load 16 aligned bytes from @pv.
126
*/
127
-static inline Int128 load_atomic16(void *pv)
128
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
129
+load_atomic16(void *pv)
130
{
131
#ifdef CONFIG_ATOMIC128
132
__uint128_t *p = __builtin_assume_aligned(pv, 16);
133
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
134
* cross an 16-byte boundary then the access must be 16-byte atomic,
135
* otherwise the access must be 8-byte atomic.
136
*/
137
-static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
138
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
139
+load_atom_extract_al16_or_al8(void *pv, int s)
140
{
141
#if defined(CONFIG_ATOMIC128)
142
uintptr_t pi = (uintptr_t)pv;
143
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
144
*
145
* Atomically store 16 aligned bytes to @pv.
146
*/
147
-static inline void store_atomic16(void *pv, Int128Alias val)
148
+static inline void ATTRIBUTE_ATOMIC128_OPT
149
+store_atomic16(void *pv, Int128Alias val)
150
{
151
#if defined(CONFIG_ATOMIC128)
152
__uint128_t *pu = __builtin_assume_aligned(pv, 16);
153
@@ -XXX,XX +XXX,XX @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
154
*
155
* Atomically store @val to @p masked by @msk.
156
*/
157
-static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
158
+static void ATTRIBUTE_ATOMIC128_OPT
159
+store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
160
{
161
#if defined(CONFIG_ATOMIC128)
162
__uint128_t *pu, old, new;
163
--
164
2.34.1
diff view generated by jsdifflib
1
By choosing "tcg:kvm" when kvm is not enabled, we generate
1
Notice when Intel or AMD have guaranteed that vmovdqa is atomic.
2
an incorrect warning: "invalid accelerator kvm".
2
The new variable will also be used in generated code.
3
3
4
At the same time, use g_str_has_suffix rather than open-coding
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
the same operation.
6
7
Presumably the inverse is also true with --disable-tcg.
8
9
Fixes: 28a0961757fc
10
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
6
---
15
vl.c | 21 +++++++++++++--------
7
include/qemu/cpuid.h | 18 ++++++++++++++++++
16
1 file changed, 13 insertions(+), 8 deletions(-)
8
tcg/i386/tcg-target.h | 1 +
9
tcg/i386/tcg-target.c.inc | 27 +++++++++++++++++++++++++++
10
3 files changed, 46 insertions(+)
17
11
18
diff --git a/vl.c b/vl.c
12
diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
19
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
20
--- a/vl.c
14
--- a/include/qemu/cpuid.h
21
+++ b/vl.c
15
+++ b/include/qemu/cpuid.h
22
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
16
@@ -XXX,XX +XXX,XX @@
23
17
#define bit_LZCNT (1 << 5)
24
if (accel == NULL) {
18
#endif
25
/* Select the default accelerator */
19
26
- if (!accel_find("tcg") && !accel_find("kvm")) {
20
+/*
27
- error_report("No accelerator selected and"
21
+ * Signatures for different CPU implementations as returned from Leaf 0.
28
- " no default accelerator available");
22
+ */
29
- exit(1);
30
- } else {
31
- int pnlen = strlen(progname);
32
- if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) {
33
+ bool have_tcg = accel_find("tcg");
34
+ bool have_kvm = accel_find("kvm");
35
+
23
+
36
+ if (have_tcg && have_kvm) {
24
+#ifndef signature_INTEL_ecx
37
+ if (g_str_has_suffix(progname, "kvm")) {
25
+/* "Genu" "ineI" "ntel" */
38
/* If the program name ends with "kvm", we prefer KVM */
26
+#define signature_INTEL_ebx 0x756e6547
39
accel = "kvm:tcg";
27
+#define signature_INTEL_edx 0x49656e69
40
} else {
28
+#define signature_INTEL_ecx 0x6c65746e
41
accel = "tcg:kvm";
29
+#endif
30
+
31
+#ifndef signature_AMD_ecx
32
+/* "Auth" "enti" "cAMD" */
33
+#define signature_AMD_ebx 0x68747541
34
+#define signature_AMD_edx 0x69746e65
35
+#define signature_AMD_ecx 0x444d4163
36
+#endif
37
+
38
static inline unsigned xgetbv_low(unsigned c)
39
{
40
unsigned a, d;
41
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/tcg/i386/tcg-target.h
44
+++ b/tcg/i386/tcg-target.h
45
@@ -XXX,XX +XXX,XX @@ extern bool have_avx512dq;
46
extern bool have_avx512vbmi2;
47
extern bool have_avx512vl;
48
extern bool have_movbe;
49
+extern bool have_atomic16;
50
51
/* optional instructions */
52
#define TCG_TARGET_HAS_div2_i32 1
53
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/i386/tcg-target.c.inc
56
+++ b/tcg/i386/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ bool have_avx512dq;
58
bool have_avx512vbmi2;
59
bool have_avx512vl;
60
bool have_movbe;
61
+bool have_atomic16;
62
63
#ifdef CONFIG_CPUID_H
64
static bool have_bmi2;
65
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
66
have_avx512dq = (b7 & bit_AVX512DQ) != 0;
67
have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
42
}
68
}
43
+ } else if (have_kvm) {
69
+
44
+ accel = "kvm";
70
+ /*
45
+ } else if (have_tcg) {
71
+ * The Intel SDM has added:
46
+ accel = "tcg";
72
+ * Processors that enumerate support for Intel® AVX
47
+ } else {
73
+ * (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
48
+ error_report("No accelerator selected and"
74
+ * guarantee that the 16-byte memory operations performed
49
+ " no default accelerator available");
75
+ * by the following instructions will always be carried
50
+ exit(1);
76
+ * out atomically:
77
+ * - MOVAPD, MOVAPS, and MOVDQA.
78
+ * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
79
+ * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
80
+ * with EVEX.128 and k0 (masking disabled).
81
+ * Note that these instructions require the linear addresses
82
+ * of their memory operands to be 16-byte aligned.
83
+ *
84
+ * AMD has provided an even stronger guarantee that processors
85
+ * with AVX provide 16-byte atomicity for all cachable,
86
+ * naturally aligned single loads and stores, e.g. MOVDQU.
87
+ *
88
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
89
+ */
90
+ if (have_avx1) {
91
+ __cpuid(0, a, b, c, d);
92
+ have_atomic16 = (c == signature_INTEL_ecx ||
93
+ c == signature_AMD_ecx);
94
+ }
51
}
95
}
52
}
96
}
53
-
97
}
54
accel_list = g_strsplit(accel, ":", 0);
55
56
for (tmp = accel_list; *tmp; tmp++) {
57
--
98
--
58
2.20.1
99
2.34.1
59
100
60
101
diff view generated by jsdifflib
New patch
1
Notice when the host has additional atomic instructions.
2
The new variables will also be used in generated code.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.h | 3 +++
9
tcg/aarch64/tcg-target.c.inc | 12 ++++++++++++
10
2 files changed, 15 insertions(+)
11
12
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/aarch64/tcg-target.h
15
+++ b/tcg/aarch64/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ typedef enum {
17
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
18
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
19
20
+extern bool have_lse;
21
+extern bool have_lse2;
22
+
23
/* optional instructions */
24
#define TCG_TARGET_HAS_div_i32 1
25
#define TCG_TARGET_HAS_rem_i32 1
26
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/aarch64/tcg-target.c.inc
29
+++ b/tcg/aarch64/tcg-target.c.inc
30
@@ -XXX,XX +XXX,XX @@
31
#include "../tcg-ldst.c.inc"
32
#include "../tcg-pool.c.inc"
33
#include "qemu/bitops.h"
34
+#ifdef __linux__
35
+#include <asm/hwcap.h>
36
+#endif
37
38
/* We're going to re-use TCGType in setting of the SF bit, which controls
39
the size of the operation performed. If we know the values match, it
40
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
41
return TCG_REG_X0 + slot;
42
}
43
44
+bool have_lse;
45
+bool have_lse2;
46
+
47
#define TCG_REG_TMP TCG_REG_X30
48
#define TCG_VEC_TMP TCG_REG_V31
49
50
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
51
52
static void tcg_target_init(TCGContext *s)
53
{
54
+#ifdef __linux__
55
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
56
+ have_lse = hwcap & HWCAP_ATOMICS;
57
+ have_lse2 = hwcap & HWCAP_USCAT;
58
+#endif
59
+
60
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
61
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
62
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
63
--
64
2.34.1
65
66
diff view generated by jsdifflib
New patch
1
These features are present for Apple M1.
1
2
3
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.c.inc | 28 ++++++++++++++++++++++++++++
9
1 file changed, 28 insertions(+)
10
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@
16
#ifdef __linux__
17
#include <asm/hwcap.h>
18
#endif
19
+#ifdef CONFIG_DARWIN
20
+#include <sys/sysctl.h>
21
+#endif
22
23
/* We're going to re-use TCGType in setting of the SF bit, which controls
24
the size of the operation performed. If we know the values match, it
25
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
26
}
27
}
28
29
+#ifdef CONFIG_DARWIN
30
+static bool sysctl_for_bool(const char *name)
31
+{
32
+ int val = 0;
33
+ size_t len = sizeof(val);
34
+
35
+ if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
36
+ return val != 0;
37
+ }
38
+
39
+ /*
40
+ * We might in the future ask for properties not present in older kernels,
41
+ * but we're only asking about static properties, all of which should be
42
+ * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
43
+ * more exotic errors.
44
+ */
45
+ assert(errno == ENOENT);
46
+ return false;
47
+}
48
+#endif
49
+
50
static void tcg_target_init(TCGContext *s)
51
{
52
#ifdef __linux__
53
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
54
have_lse = hwcap & HWCAP_ATOMICS;
55
have_lse2 = hwcap & HWCAP_USCAT;
56
#endif
57
+#ifdef CONFIG_DARWIN
58
+ have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
59
+ have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
60
+#endif
61
62
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
63
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
64
--
65
2.34.1
66
67
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 52 +++------------------------------------
9
1 file changed, 4 insertions(+), 48 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
int seg;
17
} HostAddress;
18
19
-#if defined(CONFIG_SOFTMMU)
20
/*
21
* Because i686 has no register parameters and because x86_64 has xchg
22
* to handle addr/data register overlap, we have placed all input arguments
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
25
/* resolve label address */
26
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
27
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
28
+ if (label_ptr[1]) {
29
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
30
}
31
32
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
33
34
/* resolve label address */
35
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
36
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
37
+ if (label_ptr[1]) {
38
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
42
tcg_out_jmp(s, l->raddr);
43
return true;
44
}
45
-#else
46
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
47
-{
48
- /* resolve label address */
49
- tcg_patch32(l->label_ptr[0], s->code_ptr - l->label_ptr[0] - 4);
50
-
51
- if (TCG_TARGET_REG_BITS == 32) {
52
- int ofs = 0;
53
-
54
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
55
- ofs += 4;
56
-
57
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
58
- ofs += 4;
59
- if (TARGET_LONG_BITS == 64) {
60
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
61
- ofs += 4;
62
- }
63
-
64
- tcg_out_pushi(s, (uintptr_t)l->raddr);
65
- } else {
66
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
67
- l->addrlo_reg);
68
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
69
-
70
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, (uintptr_t)l->raddr);
71
- tcg_out_push(s, TCG_REG_RAX);
72
- }
73
-
74
- /* "Tail call" to the helper, with the return address back inline. */
75
- tcg_out_jmp(s, (const void *)(l->is_ld ? helper_unaligned_ld
76
- : helper_unaligned_st));
77
- return true;
78
-}
79
-
80
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
81
-{
82
- return tcg_out_fail_alignment(s, l);
83
-}
84
-
85
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
86
-{
87
- return tcg_out_fail_alignment(s, l);
88
-}
89
90
+#ifndef CONFIG_SOFTMMU
91
static HostAddress x86_guest_base = {
92
.index = -1
93
};
94
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
95
return 0;
96
}
97
#endif /* setup_guest_base_seg */
98
-#endif /* SOFTMMU */
99
+#endif /* !SOFTMMU */
100
101
/*
102
* For softmmu, perform the TLB load and compare.
103
--
104
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.c.inc | 35 -----------------------------------
9
1 file changed, 35 deletions(-)
10
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
TCGType index_ext;
17
} HostAddress;
18
19
-#ifdef CONFIG_SOFTMMU
20
static const TCGLdstHelperParam ldst_helper_param = {
21
.ntmp = 1, .tmp = { TCG_REG_TMP }
22
};
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_goto(s, lb->raddr);
25
return true;
26
}
27
-#else
28
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
29
-{
30
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
31
- tcg_debug_assert(offset == sextract64(offset, 0, 21));
32
- tcg_out_insn(s, 3406, ADR, rd, offset);
33
-}
34
-
35
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
36
-{
37
- if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
38
- return false;
39
- }
40
-
41
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
42
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
43
-
44
- /* "Tail call" to the helper, with the return address back inline. */
45
- tcg_out_adr(s, TCG_REG_LR, l->raddr);
46
- tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
47
- : helper_unaligned_st));
48
- return true;
49
-}
50
-
51
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
52
-{
53
- return tcg_out_fail_alignment(s, l);
54
-}
55
-
56
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
57
-{
58
- return tcg_out_fail_alignment(s, l);
59
-}
60
-#endif /* CONFIG_SOFTMMU */
61
62
/*
63
* For softmmu, perform the TLB load and compare.
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.c.inc | 44 ----------------------------------------
9
1 file changed, 44 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/ppc/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
16
[MO_BSWAP | MO_UQ] = STDBRX,
17
};
18
19
-#if defined (CONFIG_SOFTMMU)
20
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
{
22
if (arg < 0) {
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_b(s, 0, lb->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
- return false;
32
- }
33
-
34
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
35
- TCGReg arg = TCG_REG_R4;
36
-
37
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
38
- if (l->addrlo_reg != arg) {
39
- tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
40
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
41
- } else if (l->addrhi_reg != arg + 1) {
42
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
43
- tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
44
- } else {
45
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
46
- tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
47
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
48
- }
49
- } else {
50
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
51
- }
52
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
53
-
54
- /* "Tail call" to the helper, with the return address back inline. */
55
- tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
56
- : helper_unaligned_st));
57
- return true;
58
-}
59
-
60
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
61
-{
62
- return tcg_out_fail_alignment(s, l);
63
-}
64
-
65
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
66
-{
67
- return tcg_out_fail_alignment(s, l);
68
-}
69
-#endif /* SOFTMMU */
70
71
typedef struct {
72
TCGReg base;
73
--
74
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/loongarch64/tcg-target.c.inc | 30 ------------------------------
9
1 file changed, 30 deletions(-)
10
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/loongarch64/tcg-target.c.inc
14
+++ b/tcg/loongarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
16
* Load/store helpers for SoftMMU, and qemu_ld/st implementations
17
*/
18
19
-#if defined(CONFIG_SOFTMMU)
20
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
21
{
22
tcg_out_opc_b(s, 0);
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
25
return tcg_out_goto(s, l->raddr);
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- /* resolve label address */
31
- if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
37
-
38
- /* tail call, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
40
- tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st), true);
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-
55
-#endif /* CONFIG_SOFTMMU */
56
57
typedef struct {
58
TCGReg base;
59
--
60
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/riscv/tcg-target.c.inc | 29 -----------------------------
9
1 file changed, 29 deletions(-)
10
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/riscv/tcg-target.c.inc
14
+++ b/tcg/riscv/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
16
* Load/store and TLB
17
*/
18
19
-#if defined(CONFIG_SOFTMMU)
20
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
21
{
22
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
tcg_out_goto(s, l->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- /* resolve label address */
31
- if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
37
-
38
- /* tail call, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
40
- tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st), true);
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-#endif /* CONFIG_SOFTMMU */
55
56
/*
57
* For softmmu, perform the TLB load and compare.
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Always reserve r3 for tlb softmmu lookup. Fix a bug in user-only
2
ALL_QLDST_REGS, in that r14 is clobbered by the BLNE that leads
3
to the misaligned trap. Remove r0+r1 from user-only ALL_QLDST_REGS;
4
I believe these had been reserved for bswap, which we no longer
5
perform during qemu_st.
1
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/arm/tcg-target-con-set.h | 16 ++++++++--------
11
tcg/arm/tcg-target-con-str.h | 5 ++---
12
tcg/arm/tcg-target.c.inc | 23 ++++++++---------------
13
3 files changed, 18 insertions(+), 26 deletions(-)
14
15
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/arm/tcg-target-con-set.h
18
+++ b/tcg/arm/tcg-target-con-set.h
19
@@ -XXX,XX +XXX,XX @@
20
C_O0_I1(r)
21
C_O0_I2(r, r)
22
C_O0_I2(r, rIN)
23
-C_O0_I2(s, s)
24
+C_O0_I2(q, q)
25
C_O0_I2(w, r)
26
-C_O0_I3(s, s, s)
27
-C_O0_I3(S, p, s)
28
+C_O0_I3(q, q, q)
29
+C_O0_I3(Q, p, q)
30
C_O0_I4(r, r, rI, rI)
31
-C_O0_I4(S, p, s, s)
32
-C_O1_I1(r, l)
33
+C_O0_I4(Q, p, q, q)
34
+C_O1_I1(r, q)
35
C_O1_I1(r, r)
36
C_O1_I1(w, r)
37
C_O1_I1(w, w)
38
C_O1_I1(w, wr)
39
C_O1_I2(r, 0, rZ)
40
-C_O1_I2(r, l, l)
41
+C_O1_I2(r, q, q)
42
C_O1_I2(r, r, r)
43
C_O1_I2(r, r, rI)
44
C_O1_I2(r, r, rIK)
45
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wZ)
46
C_O1_I3(w, w, w, w)
47
C_O1_I4(r, r, r, rI, rI)
48
C_O1_I4(r, r, rIN, rIK, 0)
49
-C_O2_I1(e, p, l)
50
-C_O2_I2(e, p, l, l)
51
+C_O2_I1(e, p, q)
52
+C_O2_I2(e, p, q, q)
53
C_O2_I2(r, r, r, r)
54
C_O2_I4(r, r, r, r, rIN, rIK)
55
C_O2_I4(r, r, rI, rI, rIN, rIK)
56
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
57
index XXXXXXX..XXXXXXX 100644
58
--- a/tcg/arm/tcg-target-con-str.h
59
+++ b/tcg/arm/tcg-target-con-str.h
60
@@ -XXX,XX +XXX,XX @@
61
*/
62
REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
63
REGS('r', ALL_GENERAL_REGS)
64
-REGS('l', ALL_QLOAD_REGS)
65
-REGS('s', ALL_QSTORE_REGS)
66
-REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */
67
+REGS('q', ALL_QLDST_REGS)
68
+REGS('Q', ALL_QLDST_REGS & 0x5555) /* even qldst */
69
REGS('w', ALL_VECTOR_REGS)
70
71
/*
72
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
73
index XXXXXXX..XXXXXXX 100644
74
--- a/tcg/arm/tcg-target.c.inc
75
+++ b/tcg/arm/tcg-target.c.inc
76
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
77
#define ALL_VECTOR_REGS 0xffff0000u
78
79
/*
80
- * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
81
- * and r0-r1 doing the byte swapping, so don't use these.
82
- * r3 is removed for softmmu to avoid clashes with helper arguments.
83
+ * r0-r3 will be overwritten when reading the tlb entry (softmmu only);
84
+ * r14 will be overwritten by the BLNE branching to the slow path.
85
*/
86
#ifdef CONFIG_SOFTMMU
87
-#define ALL_QLOAD_REGS \
88
+#define ALL_QLDST_REGS \
89
(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
90
(1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
91
(1 << TCG_REG_R14)))
92
-#define ALL_QSTORE_REGS \
93
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
94
- (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
95
- ((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
96
#else
97
-#define ALL_QLOAD_REGS ALL_GENERAL_REGS
98
-#define ALL_QSTORE_REGS \
99
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
100
+#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R14))
101
#endif
102
103
/*
104
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
105
return C_O1_I4(r, r, r, rI, rI);
106
107
case INDEX_op_qemu_ld_i32:
108
- return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
109
+ return TARGET_LONG_BITS == 32 ? C_O1_I1(r, q) : C_O1_I2(r, q, q);
110
case INDEX_op_qemu_ld_i64:
111
- return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
112
+ return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, q) : C_O2_I2(e, p, q, q);
113
case INDEX_op_qemu_st_i32:
114
- return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
115
+ return TARGET_LONG_BITS == 32 ? C_O0_I2(q, q) : C_O0_I3(q, q, q);
116
case INDEX_op_qemu_st_i64:
117
- return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
118
+ return TARGET_LONG_BITS == 32 ? C_O0_I3(Q, p, q) : C_O0_I4(Q, p, q, q);
119
120
case INDEX_op_st_vec:
121
return C_O0_I2(w, r);
122
--
123
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/arm/tcg-target.c.inc | 45 ----------------------------------------
9
1 file changed, 45 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
bool index_scratch;
17
} HostAddress;
18
19
-#ifdef CONFIG_SOFTMMU
20
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
{
22
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
- return false;
32
- }
33
-
34
- if (TARGET_LONG_BITS == 64) {
35
- /* 64-bit target address is aligned into R2:R3. */
36
- TCGMovExtend ext[2] = {
37
- { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
38
- .src = l->addrlo_reg,
39
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
40
- { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
41
- .src = l->addrhi_reg,
42
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
43
- };
44
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
45
- } else {
46
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
47
- }
48
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
49
-
50
- /*
51
- * Tail call to the helper, with the return address back inline,
52
- * just for the clarity of the debugging traceback -- the helper
53
- * cannot return. We have used BLNE to arrive here, so LR is
54
- * already set.
55
- */
56
- tcg_out_goto(s, COND_AL, (const void *)
57
- (l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
58
- return true;
59
-}
60
-
61
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
62
-{
63
- return tcg_out_fail_alignment(s, l);
64
-}
65
-
66
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
67
-{
68
- return tcg_out_fail_alignment(s, l);
69
-}
70
-#endif /* SOFTMMU */
71
72
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
73
TCGReg addrlo, TCGReg addrhi,
74
--
75
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/mips/tcg-target.c.inc | 57 ++-------------------------------------
9
1 file changed, 2 insertions(+), 55 deletions(-)
10
11
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/mips/tcg-target.c.inc
14
+++ b/tcg/mips/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
16
tcg_out_nop(s);
17
}
18
19
-#if defined(CONFIG_SOFTMMU)
20
/* We have four temps, we might as well expose three of them. */
21
static const TCGLdstHelperParam ldst_helper_param = {
22
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
25
/* resolve label address */
26
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
27
- || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
28
- && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
29
+ || (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
30
return false;
31
}
32
33
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
34
35
/* resolve label address */
36
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
37
- || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
38
- && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
39
+ || (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
40
return false;
41
}
42
43
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
44
return true;
45
}
46
47
-#else
48
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
49
-{
50
- void *target;
51
-
52
- if (!reloc_pc16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
53
- return false;
54
- }
55
-
56
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
57
- /* A0 is env, A1 is skipped, A2:A3 is the uint64_t address. */
58
- TCGReg a2 = MIPS_BE ? l->addrhi_reg : l->addrlo_reg;
59
- TCGReg a3 = MIPS_BE ? l->addrlo_reg : l->addrhi_reg;
60
-
61
- if (a3 != TCG_REG_A2) {
62
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
63
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
64
- } else if (a2 != TCG_REG_A3) {
65
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
66
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
67
- } else {
68
- tcg_out_mov(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A2);
69
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_A3);
70
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, TCG_TMP0);
71
- }
72
- } else {
73
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
74
- }
75
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
76
-
77
- /*
78
- * Tail call to the helper, with the return address back inline.
79
- * We have arrived here via BNEL, so $31 is already set.
80
- */
81
- target = (l->is_ld ? helper_unaligned_ld : helper_unaligned_st);
82
- tcg_out_call_int(s, target, true);
83
- return true;
84
-}
85
-
86
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
87
-{
88
- return tcg_out_fail_alignment(s, l);
89
-}
90
-
91
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
92
-{
93
- return tcg_out_fail_alignment(s, l);
94
-}
95
-#endif /* SOFTMMU */
96
-
97
typedef struct {
98
TCGReg base;
99
MemOp align;
100
--
101
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/s390x/tcg-target.c.inc | 29 -----------------------------
9
1 file changed, 29 deletions(-)
10
11
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/s390x/tcg-target.c.inc
14
+++ b/tcg/s390x/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
16
}
17
}
18
19
-#if defined(CONFIG_SOFTMMU)
20
static const TCGLdstHelperParam ldst_helper_param = {
21
.ntmp = 1, .tmp = { TCG_TMP0 }
22
};
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
31
- (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
37
-
38
- /* "Tail call" to the helper, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
40
- tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st));
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-#endif /* CONFIG_SOFTMMU */
55
56
/*
57
* For softmmu, perform the TLB load and compare.
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 15 +++++++--------
5
1 file changed, 7 insertions(+), 8 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
12
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
13
#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
14
15
-/* Define some temporary registers. T2 is used for constant generation. */
16
+/* Define some temporary registers. T3 is used for constant generation. */
17
#define TCG_REG_T1 TCG_REG_G1
18
-#define TCG_REG_T2 TCG_REG_O7
19
+#define TCG_REG_T2 TCG_REG_G2
20
+#define TCG_REG_T3 TCG_REG_O7
21
22
#ifndef CONFIG_SOFTMMU
23
# define TCG_GUEST_BASE_REG TCG_REG_I5
24
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
25
TCG_REG_I4,
26
TCG_REG_I5,
27
28
- TCG_REG_G2,
29
TCG_REG_G3,
30
TCG_REG_G4,
31
TCG_REG_G5,
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
static void tcg_out_movi(TCGContext *s, TCGType type,
34
TCGReg ret, tcg_target_long arg)
35
{
36
- tcg_debug_assert(ret != TCG_REG_T2);
37
- tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
38
+ tcg_debug_assert(ret != TCG_REG_T3);
39
+ tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T3);
40
}
41
42
static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest,
44
{
45
uintptr_t desti = (uintptr_t)dest;
46
47
- /* Be careful not to clobber %o7 for a tail call. */
48
tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
49
- desti & ~0xfff, in_prologue,
50
- tail_call ? TCG_REG_G2 : TCG_REG_O7);
51
+ desti & ~0xfff, in_prologue, TCG_REG_T2);
52
tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7,
53
TCG_REG_T1, desti & 0xfff, JMPL);
54
}
55
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
56
tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
57
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
58
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
59
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T3); /* for internal use */
60
}
61
62
#define ELF_HOST_MACHINE EM_SPARCV9
63
--
64
2.34.1
diff view generated by jsdifflib
New patch
1
Emphasize that the constant is signed.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/sparc64/tcg-target.c.inc | 21 +++++++++++----------
7
1 file changed, 11 insertions(+), 10 deletions(-)
8
9
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/sparc64/tcg-target.c.inc
12
+++ b/tcg/sparc64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
14
tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
15
}
16
17
-static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
18
+/* A 13-bit constant sign-extended to 64 bits. */
19
+static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
20
{
21
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
22
}
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
24
{
25
if (check_fit_i32(arg, 13)) {
26
/* A 13-bit constant sign-extended to 64-bits. */
27
- tcg_out_movi_imm13(s, ret, arg);
28
+ tcg_out_movi_s13(s, ret, arg);
29
} else {
30
/* A 32-bit constant zero-extended to 64 bits. */
31
tcg_out_sethi(s, ret, arg);
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
34
/* A 13-bit constant sign-extended to 64-bits. */
35
if (check_fit_tl(arg, 13)) {
36
- tcg_out_movi_imm13(s, ret, arg);
37
+ tcg_out_movi_s13(s, ret, arg);
38
return;
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
42
43
default:
44
tcg_out_cmp(s, c1, c2, c2const);
45
- tcg_out_movi_imm13(s, ret, 0);
46
+ tcg_out_movi_s13(s, ret, 0);
47
tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
48
return;
49
}
50
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
51
/* For 64-bit signed comparisons vs zero, we can avoid the compare
52
if the input does not overlap the output. */
53
if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
54
- tcg_out_movi_imm13(s, ret, 0);
55
+ tcg_out_movi_s13(s, ret, 0);
56
tcg_out_movr(s, cond, ret, c1, 1, 1);
57
} else {
58
tcg_out_cmp(s, c1, c2, c2const);
59
- tcg_out_movi_imm13(s, ret, 0);
60
+ tcg_out_movi_s13(s, ret, 0);
61
tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
62
}
63
}
64
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
65
if (use_vis3_instructions && !is_sub) {
66
/* Note that ADDXC doesn't accept immediates. */
67
if (bhconst && bh != 0) {
68
- tcg_out_movi_imm13(s, TCG_REG_T2, bh);
69
+ tcg_out_movi_s13(s, TCG_REG_T2, bh);
70
bh = TCG_REG_T2;
71
}
72
tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
74
* so the adjustment fits 12 bits.
75
*/
76
if (bhconst) {
77
- tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
78
+ tcg_out_movi_s13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
79
} else {
80
tcg_out_arithi(s, TCG_REG_T2, bh, 1,
81
is_sub ? ARITH_SUB : ARITH_ADD);
82
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
83
tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
84
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
85
/* delay slot */
86
- tcg_out_movi_imm13(s, TCG_REG_O0, 0);
87
+ tcg_out_movi_s13(s, TCG_REG_O0, 0);
88
89
build_trampolines(s);
90
}
91
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
92
{
93
if (check_fit_ptr(a0, 13)) {
94
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
95
- tcg_out_movi_imm13(s, TCG_REG_O0, a0);
96
+ tcg_out_movi_s13(s, TCG_REG_O0, a0);
97
return;
98
} else {
99
intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
100
--
101
2.34.1
diff view generated by jsdifflib
New patch
1
Shuffle the order in tcg_out_movi_int to check s13 first, and
2
drop this check from tcg_out_movi_imm32. This might make the
3
sequence for in_prologue larger, but not worth worrying about.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/sparc64/tcg-target.c.inc | 25 ++++++++++---------------
9
1 file changed, 10 insertions(+), 15 deletions(-)
10
11
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/sparc64/tcg-target.c.inc
14
+++ b/tcg/sparc64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
16
17
static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
18
{
19
- if (check_fit_i32(arg, 13)) {
20
- /* A 13-bit constant sign-extended to 64-bits. */
21
- tcg_out_movi_s13(s, ret, arg);
22
- } else {
23
- /* A 32-bit constant zero-extended to 64 bits. */
24
- tcg_out_sethi(s, ret, arg);
25
- if (arg & 0x3ff) {
26
- tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
27
- }
28
+ /* A 32-bit constant zero-extended to 64 bits. */
29
+ tcg_out_sethi(s, ret, arg);
30
+ if (arg & 0x3ff) {
31
+ tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
32
}
33
}
34
35
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
36
tcg_target_long hi, lo = (int32_t)arg;
37
tcg_target_long test, lsb;
38
39
- /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
40
- if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
41
- tcg_out_movi_imm32(s, ret, arg);
42
- return;
43
- }
44
-
45
/* A 13-bit constant sign-extended to 64-bits. */
46
if (check_fit_tl(arg, 13)) {
47
tcg_out_movi_s13(s, ret, arg);
48
return;
49
}
50
51
+ /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
52
+ if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
53
+ tcg_out_movi_imm32(s, ret, arg);
54
+ return;
55
+ }
56
+
57
/* A 13-bit constant relative to the TB. */
58
if (!in_prologue) {
59
test = tcg_tbrel_diff(s, (void *)arg);
60
--
61
2.34.1
diff view generated by jsdifflib
New patch
1
Emphasize that the constant is unsigned.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/sparc64/tcg-target.c.inc | 12 ++++++------
7
1 file changed, 6 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/sparc64/tcg-target.c.inc
12
+++ b/tcg/sparc64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
14
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
15
}
16
17
-static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
18
+/* A 32-bit constant zero-extended to 64 bits. */
19
+static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
20
{
21
- /* A 32-bit constant zero-extended to 64 bits. */
22
tcg_out_sethi(s, ret, arg);
23
if (arg & 0x3ff) {
24
tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
26
27
/* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
28
if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
29
- tcg_out_movi_imm32(s, ret, arg);
30
+ tcg_out_movi_u32(s, ret, arg);
31
return;
32
}
33
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
35
/* A 64-bit constant decomposed into 2 32-bit pieces. */
36
if (check_fit_i32(lo, 13)) {
37
hi = (arg - lo) >> 32;
38
- tcg_out_movi_imm32(s, ret, hi);
39
+ tcg_out_movi_u32(s, ret, hi);
40
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
41
tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
42
} else {
43
hi = arg >> 32;
44
- tcg_out_movi_imm32(s, ret, hi);
45
- tcg_out_movi_imm32(s, scratch, lo);
46
+ tcg_out_movi_u32(s, ret, hi);
47
+ tcg_out_movi_u32(s, scratch, lo);
48
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
49
tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
50
}
51
--
52
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 10 ++++++++--
5
1 file changed, 8 insertions(+), 2 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
12
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
13
}
14
15
+/* A 32-bit constant sign-extended to 64 bits. */
16
+static void tcg_out_movi_s32(TCGContext *s, TCGReg ret, int32_t arg)
17
+{
18
+ tcg_out_sethi(s, ret, ~arg);
19
+ tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
20
+}
21
+
22
/* A 32-bit constant zero-extended to 64 bits. */
23
static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
24
{
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
26
27
/* A 32-bit constant sign-extended to 64-bits. */
28
if (arg == lo) {
29
- tcg_out_sethi(s, ret, ~arg);
30
- tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
31
+ tcg_out_movi_s32(s, ret, arg);
32
return;
33
}
34
35
--
36
2.34.1
diff view generated by jsdifflib
New patch
1
Drop the target-specific trampolines for the standard slow path.
2
This lets us use tcg_out_helper_{ld,st}_args, and handles the new
3
atomicity bits within MemOp.
1
4
5
At the same time, use the full load/store helpers for user-only mode.
6
Drop inline unaligned access support for user-only mode, as it does
7
not handle atomicity.
8
9
Use TCG_REG_T[1-3] in the tlb lookup, instead of TCG_REG_O[0-2].
10
This allows the constraints to be simplified.
11
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
tcg/sparc64/tcg-target-con-set.h | 2 -
16
tcg/sparc64/tcg-target-con-str.h | 1 -
17
tcg/sparc64/tcg-target.h | 1 +
18
tcg/sparc64/tcg-target.c.inc | 610 +++++++++----------------------
19
4 files changed, 182 insertions(+), 432 deletions(-)
20
21
diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/sparc64/tcg-target-con-set.h
24
+++ b/tcg/sparc64/tcg-target-con-set.h
25
@@ -XXX,XX +XXX,XX @@
26
C_O0_I1(r)
27
C_O0_I2(rZ, r)
28
C_O0_I2(rZ, rJ)
29
-C_O0_I2(sZ, s)
30
-C_O1_I1(r, s)
31
C_O1_I1(r, r)
32
C_O1_I2(r, r, r)
33
C_O1_I2(r, rZ, rJ)
34
diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/tcg/sparc64/tcg-target-con-str.h
37
+++ b/tcg/sparc64/tcg-target-con-str.h
38
@@ -XXX,XX +XXX,XX @@
39
* REGS(letter, register_mask)
40
*/
41
REGS('r', ALL_GENERAL_REGS)
42
-REGS('s', ALL_QLDST_REGS)
43
44
/*
45
* Define constraint letters for constants:
46
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/sparc64/tcg-target.h
49
+++ b/tcg/sparc64/tcg-target.h
50
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
51
52
#define TCG_TARGET_DEFAULT_MO (0)
53
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
54
+#define TCG_TARGET_NEED_LDST_LABELS
55
#define TCG_TARGET_NEED_POOL_LABELS
56
57
#endif
58
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/sparc64/tcg-target.c.inc
61
+++ b/tcg/sparc64/tcg-target.c.inc
62
@@ -XXX,XX +XXX,XX @@
63
#error "unsupported code generation mode"
64
#endif
65
66
+#include "../tcg-ldst.c.inc"
67
#include "../tcg-pool.c.inc"
68
69
#ifdef CONFIG_DEBUG_TCG
70
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
71
#define TCG_CT_CONST_S13 0x200
72
#define TCG_CT_CONST_ZERO 0x400
73
74
-/*
75
- * For softmmu, we need to avoid conflicts with the first 3
76
- * argument registers to perform the tlb lookup, and to call
77
- * the helper function.
78
- */
79
-#ifdef CONFIG_SOFTMMU
80
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
81
-#else
82
-#define SOFTMMU_RESERVE_REGS 0
83
-#endif
84
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
85
-#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
86
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
87
88
/* Define some temporary registers. T3 is used for constant generation. */
89
#define TCG_REG_T1 TCG_REG_G1
90
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
91
tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
92
}
93
94
-#ifdef CONFIG_SOFTMMU
95
-static const tcg_insn_unit *qemu_ld_trampoline[MO_SSIZE + 1];
96
-static const tcg_insn_unit *qemu_st_trampoline[MO_SIZE + 1];
97
-
98
-static void build_trampolines(TCGContext *s)
99
-{
100
- int i;
101
-
102
- for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
103
- if (qemu_ld_helpers[i] == NULL) {
104
- continue;
105
- }
106
-
107
- /* May as well align the trampoline. */
108
- while ((uintptr_t)s->code_ptr & 15) {
109
- tcg_out_nop(s);
110
- }
111
- qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
112
-
113
- /* Set the retaddr operand. */
114
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
115
- /* Tail call. */
116
- tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
117
- /* delay slot -- set the env argument */
118
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
119
- }
120
-
121
- for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
122
- if (qemu_st_helpers[i] == NULL) {
123
- continue;
124
- }
125
-
126
- /* May as well align the trampoline. */
127
- while ((uintptr_t)s->code_ptr & 15) {
128
- tcg_out_nop(s);
129
- }
130
- qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
131
-
132
- /* Set the retaddr operand. */
133
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
134
-
135
- /* Tail call. */
136
- tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
137
- /* delay slot -- set the env argument */
138
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
139
- }
140
-}
141
-#else
142
-static const tcg_insn_unit *qemu_unalign_ld_trampoline;
143
-static const tcg_insn_unit *qemu_unalign_st_trampoline;
144
-
145
-static void build_trampolines(TCGContext *s)
146
-{
147
- for (int ld = 0; ld < 2; ++ld) {
148
- void *helper;
149
-
150
- while ((uintptr_t)s->code_ptr & 15) {
151
- tcg_out_nop(s);
152
- }
153
-
154
- if (ld) {
155
- helper = helper_unaligned_ld;
156
- qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr);
157
- } else {
158
- helper = helper_unaligned_st;
159
- qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
160
- }
161
-
162
- /* Tail call. */
163
- tcg_out_jmpl_const(s, helper, true, true);
164
- /* delay slot -- set the env argument */
165
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
166
- }
167
-}
168
-#endif
169
-
170
/* Generate global QEMU prologue and epilogue code */
171
static void tcg_target_qemu_prologue(TCGContext *s)
172
{
173
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
174
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
175
/* delay slot */
176
tcg_out_movi_s13(s, TCG_REG_O0, 0);
177
-
178
- build_trampolines(s);
179
}
180
181
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
183
}
184
}
185
186
-#if defined(CONFIG_SOFTMMU)
187
+static const TCGLdstHelperParam ldst_helper_param = {
188
+ .ntmp = 1, .tmp = { TCG_REG_T1 }
189
+};
190
191
-/* We expect to use a 13-bit negative offset from ENV. */
192
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
193
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
194
-
195
-/* Perform the TLB load and compare.
196
-
197
- Inputs:
198
- ADDRLO and ADDRHI contain the possible two parts of the address.
199
-
200
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
201
-
202
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
203
- This should be offsetof addr_read or addr_write.
204
-
205
- The result of the TLB comparison is in %[ix]cc. The sanitized address
206
- is in the returned register, maybe %o0. The TLB addend is in %o1. */
207
-
208
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
209
- MemOp opc, int which)
210
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
211
{
212
+ MemOp opc = get_memop(lb->oi);
213
+ MemOp sgn;
214
+
215
+ if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
216
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
217
+ return false;
218
+ }
219
+
220
+ /* Use inline tcg_out_ext32s; otherwise let the helper sign-extend. */
221
+ sgn = (opc & MO_SIZE) < MO_32 ? MO_SIGN : 0;
222
+
223
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
224
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_SIZE | sgn)], NULL);
225
+ tcg_out_ld_helper_ret(s, lb, sgn, &ldst_helper_param);
226
+
227
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
228
+ return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
229
+ (intptr_t)lb->raddr, 0);
230
+}
231
+
232
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
233
+{
234
+ MemOp opc = get_memop(lb->oi);
235
+
236
+ if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
237
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
238
+ return false;
239
+ }
240
+
241
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
242
+ tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE], NULL);
243
+
244
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
245
+ return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
246
+ (intptr_t)lb->raddr, 0);
247
+}
248
+
249
+typedef struct {
250
+ TCGReg base;
251
+ TCGReg index;
252
+} HostAddress;
253
+
254
+/*
255
+ * For softmmu, perform the TLB load and compare.
256
+ * For useronly, perform any required alignment tests.
257
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
258
+ * is required and fill in @h with the host address for the fast path.
259
+ */
260
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
261
+ TCGReg addr_reg, MemOpIdx oi,
262
+ bool is_ld)
263
+{
264
+ TCGLabelQemuLdst *ldst = NULL;
265
+ MemOp opc = get_memop(oi);
266
+ unsigned a_bits = get_alignment_bits(opc);
267
+ unsigned s_bits = opc & MO_SIZE;
268
+ unsigned a_mask;
269
+
270
+ /* We don't support unaligned accesses. */
271
+ a_bits = MAX(a_bits, s_bits);
272
+ a_mask = (1u << a_bits) - 1;
273
+
274
+#ifdef CONFIG_SOFTMMU
275
+ int mem_index = get_mmuidx(oi);
276
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
277
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
278
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
279
- const TCGReg r0 = TCG_REG_O0;
280
- const TCGReg r1 = TCG_REG_O1;
281
- const TCGReg r2 = TCG_REG_O2;
282
- unsigned s_bits = opc & MO_SIZE;
283
- unsigned a_bits = get_alignment_bits(opc);
284
- tcg_target_long compare_mask;
285
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
286
+ : offsetof(CPUTLBEntry, addr_write);
287
+ int add_off = offsetof(CPUTLBEntry, addend);
288
+ int compare_mask;
289
+ int cc;
290
291
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
292
- tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
293
- tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
294
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
295
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
296
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T2, TCG_AREG0, mask_off);
297
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T3, TCG_AREG0, table_off);
298
299
/* Extract the page index, shifted into place for tlb index. */
300
- tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
301
- SHIFT_SRL);
302
- tcg_out_arith(s, r2, r2, r0, ARITH_AND);
303
+ tcg_out_arithi(s, TCG_REG_T1, addr_reg,
304
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
305
+ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
306
307
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
308
- tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
309
+ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T3, ARITH_ADD);
310
311
- /* Load the tlb comparator and the addend. */
312
- tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
313
- tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
314
+ /* Load the tlb comparator and the addend. */
315
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_T2, TCG_REG_T1, cmp_off);
316
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T1, TCG_REG_T1, add_off);
317
+ h->base = TCG_REG_T1;
318
319
- /* Mask out the page offset, except for the required alignment.
320
- We don't support unaligned accesses. */
321
- if (a_bits < s_bits) {
322
- a_bits = s_bits;
323
- }
324
- compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
325
+ /* Mask out the page offset, except for the required alignment. */
326
+ compare_mask = TARGET_PAGE_MASK | a_mask;
327
if (check_fit_tl(compare_mask, 13)) {
328
- tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
329
+ tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
330
} else {
331
- tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
332
- tcg_out_arith(s, r2, addr, r2, ARITH_AND);
333
+ tcg_out_movi_s32(s, TCG_REG_T3, compare_mask);
334
+ tcg_out_arith(s, TCG_REG_T3, addr_reg, TCG_REG_T3, ARITH_AND);
335
}
336
- tcg_out_cmp(s, r0, r2, 0);
337
+ tcg_out_cmp(s, TCG_REG_T2, TCG_REG_T3, 0);
338
339
- /* If the guest address must be zero-extended, do so now. */
340
+ ldst = new_ldst_label(s);
341
+ ldst->is_ld = is_ld;
342
+ ldst->oi = oi;
343
+ ldst->addrlo_reg = addr_reg;
344
+ ldst->label_ptr[0] = s->code_ptr;
345
+
346
+ /* bne,pn %[xi]cc, label0 */
347
+ cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
348
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
349
+#else
350
+ if (a_bits != s_bits) {
351
+ /*
352
+ * Test for at least natural alignment, and defer
353
+ * everything else to the helper functions.
354
+ */
355
+ tcg_debug_assert(check_fit_tl(a_mask, 13));
356
+ tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
357
+
358
+ ldst = new_ldst_label(s);
359
+ ldst->is_ld = is_ld;
360
+ ldst->oi = oi;
361
+ ldst->addrlo_reg = addr_reg;
362
+ ldst->label_ptr[0] = s->code_ptr;
363
+
364
+ /* bne,pn %icc, label0 */
365
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN | BPCC_ICC, 0);
366
+ }
367
+ h->base = guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0;
368
+#endif
369
+
370
+ /* If the guest address must be zero-extended, do in the delay slot. */
371
if (TARGET_LONG_BITS == 32) {
372
- tcg_out_ext32u(s, r0, addr);
373
- return r0;
374
+ tcg_out_ext32u(s, TCG_REG_T2, addr_reg);
375
+ h->index = TCG_REG_T2;
376
+ } else {
377
+ if (ldst) {
378
+ tcg_out_nop(s);
379
+ }
380
+ h->index = addr_reg;
381
}
382
- return addr;
383
+ return ldst;
384
}
385
-#endif /* CONFIG_SOFTMMU */
386
-
387
-static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
388
- [MO_UB] = LDUB,
389
- [MO_SB] = LDSB,
390
- [MO_UB | MO_LE] = LDUB,
391
- [MO_SB | MO_LE] = LDSB,
392
-
393
- [MO_BEUW] = LDUH,
394
- [MO_BESW] = LDSH,
395
- [MO_BEUL] = LDUW,
396
- [MO_BESL] = LDSW,
397
- [MO_BEUQ] = LDX,
398
- [MO_BESQ] = LDX,
399
-
400
- [MO_LEUW] = LDUH_LE,
401
- [MO_LESW] = LDSH_LE,
402
- [MO_LEUL] = LDUW_LE,
403
- [MO_LESL] = LDSW_LE,
404
- [MO_LEUQ] = LDX_LE,
405
- [MO_LESQ] = LDX_LE,
406
-};
407
-
408
-static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
409
- [MO_UB] = STB,
410
-
411
- [MO_BEUW] = STH,
412
- [MO_BEUL] = STW,
413
- [MO_BEUQ] = STX,
414
-
415
- [MO_LEUW] = STH_LE,
416
- [MO_LEUL] = STW_LE,
417
- [MO_LEUQ] = STX_LE,
418
-};
419
420
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
421
MemOpIdx oi, TCGType data_type)
422
{
423
- MemOp memop = get_memop(oi);
424
- tcg_insn_unit *label_ptr;
425
+ static const int ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
426
+ [MO_UB] = LDUB,
427
+ [MO_SB] = LDSB,
428
+ [MO_UB | MO_LE] = LDUB,
429
+ [MO_SB | MO_LE] = LDSB,
430
431
-#ifdef CONFIG_SOFTMMU
432
- unsigned memi = get_mmuidx(oi);
433
- TCGReg addrz;
434
- const tcg_insn_unit *func;
435
+ [MO_BEUW] = LDUH,
436
+ [MO_BESW] = LDSH,
437
+ [MO_BEUL] = LDUW,
438
+ [MO_BESL] = LDSW,
439
+ [MO_BEUQ] = LDX,
440
+ [MO_BESQ] = LDX,
441
442
- addrz = tcg_out_tlb_load(s, addr, memi, memop,
443
- offsetof(CPUTLBEntry, addr_read));
444
+ [MO_LEUW] = LDUH_LE,
445
+ [MO_LESW] = LDSH_LE,
446
+ [MO_LEUL] = LDUW_LE,
447
+ [MO_LESL] = LDSW_LE,
448
+ [MO_LEUQ] = LDX_LE,
449
+ [MO_LESQ] = LDX_LE,
450
+ };
451
452
- /* The fast path is exactly one insn. Thus we can perform the
453
- entire TLB Hit in the (annulled) delay slot of the branch
454
- over the TLB Miss case. */
455
+ TCGLabelQemuLdst *ldst;
456
+ HostAddress h;
457
458
- /* beq,a,pt %[xi]cc, label0 */
459
- label_ptr = s->code_ptr;
460
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
461
- | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
462
- /* delay slot */
463
- tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
464
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
465
+ ldst = prepare_host_addr(s, &h, addr, oi, true);
466
467
- /* TLB Miss. */
468
+ tcg_out_ldst_rr(s, data, h.base, h.index,
469
+ ld_opc[get_memop(oi) & (MO_BSWAP | MO_SSIZE)]);
470
471
- tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
472
-
473
- /* We use the helpers to extend SB and SW data, leaving the case
474
- of SL needing explicit extending below. */
475
- if ((memop & MO_SSIZE) == MO_SL) {
476
- func = qemu_ld_trampoline[MO_UL];
477
- } else {
478
- func = qemu_ld_trampoline[memop & MO_SSIZE];
479
+ if (ldst) {
480
+ ldst->type = data_type;
481
+ ldst->datalo_reg = data;
482
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
483
}
484
- tcg_debug_assert(func != NULL);
485
- tcg_out_call_nodelay(s, func, false);
486
- /* delay slot */
487
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
488
-
489
- /* We let the helper sign-extend SB and SW, but leave SL for here. */
490
- if ((memop & MO_SSIZE) == MO_SL) {
491
- tcg_out_ext32s(s, data, TCG_REG_O0);
492
- } else {
493
- tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
494
- }
495
-
496
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
497
-#else
498
- TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
499
- unsigned a_bits = get_alignment_bits(memop);
500
- unsigned s_bits = memop & MO_SIZE;
501
- unsigned t_bits;
502
-
503
- if (TARGET_LONG_BITS == 32) {
504
- tcg_out_ext32u(s, TCG_REG_T1, addr);
505
- addr = TCG_REG_T1;
506
- }
507
-
508
- /*
509
- * Normal case: alignment equal to access size.
510
- */
511
- if (a_bits == s_bits) {
512
- tcg_out_ldst_rr(s, data, addr, index,
513
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
514
- return;
515
- }
516
-
517
- /*
518
- * Test for at least natural alignment, and assume most accesses
519
- * will be aligned -- perform a straight load in the delay slot.
520
- * This is required to preserve atomicity for aligned accesses.
521
- */
522
- t_bits = MAX(a_bits, s_bits);
523
- tcg_debug_assert(t_bits < 13);
524
- tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
525
-
526
- /* beq,a,pt %icc, label */
527
- label_ptr = s->code_ptr;
528
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
529
- /* delay slot */
530
- tcg_out_ldst_rr(s, data, addr, index,
531
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
532
-
533
- if (a_bits >= s_bits) {
534
- /*
535
- * Overalignment: A successful alignment test will perform the memory
536
- * operation in the delay slot, and failure need only invoke the
537
- * handler for SIGBUS.
538
- */
539
- tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
540
- /* delay slot -- move to low part of argument reg */
541
- tcg_out_mov_delay(s, TCG_REG_O1, addr);
542
- } else {
543
- /* Underalignment: load by pieces of minimum alignment. */
544
- int ld_opc, a_size, s_size, i;
545
-
546
- /*
547
- * Force full address into T1 early; avoids problems with
548
- * overlap between @addr and @data.
549
- */
550
- tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
551
-
552
- a_size = 1 << a_bits;
553
- s_size = 1 << s_bits;
554
- if ((memop & MO_BSWAP) == MO_BE) {
555
- ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)];
556
- tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
557
- ld_opc = qemu_ld_opc[a_bits | MO_BE];
558
- for (i = a_size; i < s_size; i += a_size) {
559
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
560
- tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX);
561
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
562
- }
563
- } else if (a_bits == 0) {
564
- ld_opc = LDUB;
565
- tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
566
- for (i = a_size; i < s_size; i += a_size) {
567
- if ((memop & MO_SIGN) && i == s_size - a_size) {
568
- ld_opc = LDSB;
569
- }
570
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
571
- tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
572
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
573
- }
574
- } else {
575
- ld_opc = qemu_ld_opc[a_bits | MO_LE];
576
- tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc);
577
- for (i = a_size; i < s_size; i += a_size) {
578
- tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
579
- if ((memop & MO_SIGN) && i == s_size - a_size) {
580
- ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN];
581
- }
582
- tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc);
583
- tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
584
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
585
- }
586
- }
587
- }
588
-
589
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
590
-#endif /* CONFIG_SOFTMMU */
591
}
592
593
static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
594
MemOpIdx oi, TCGType data_type)
595
{
596
- MemOp memop = get_memop(oi);
597
- tcg_insn_unit *label_ptr;
598
+ static const int st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
599
+ [MO_UB] = STB,
600
601
-#ifdef CONFIG_SOFTMMU
602
- unsigned memi = get_mmuidx(oi);
603
- TCGReg addrz;
604
- const tcg_insn_unit *func;
605
+ [MO_BEUW] = STH,
606
+ [MO_BEUL] = STW,
607
+ [MO_BEUQ] = STX,
608
609
- addrz = tcg_out_tlb_load(s, addr, memi, memop,
610
- offsetof(CPUTLBEntry, addr_write));
611
+ [MO_LEUW] = STH_LE,
612
+ [MO_LEUL] = STW_LE,
613
+ [MO_LEUQ] = STX_LE,
614
+ };
615
616
- /* The fast path is exactly one insn. Thus we can perform the entire
617
- TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
618
- /* beq,a,pt %[xi]cc, label0 */
619
- label_ptr = s->code_ptr;
620
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
621
- | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
622
- /* delay slot */
623
- tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
624
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
625
+ TCGLabelQemuLdst *ldst;
626
+ HostAddress h;
627
628
- /* TLB Miss. */
629
+ ldst = prepare_host_addr(s, &h, addr, oi, false);
630
631
- tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
632
- tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
633
- TCG_REG_O2, data_type, memop & MO_SIZE, data);
634
+ tcg_out_ldst_rr(s, data, h.base, h.index,
635
+ st_opc[get_memop(oi) & (MO_BSWAP | MO_SIZE)]);
636
637
- func = qemu_st_trampoline[memop & MO_SIZE];
638
- tcg_debug_assert(func != NULL);
639
- tcg_out_call_nodelay(s, func, false);
640
- /* delay slot */
641
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
642
-
643
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
644
-#else
645
- TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
646
- unsigned a_bits = get_alignment_bits(memop);
647
- unsigned s_bits = memop & MO_SIZE;
648
- unsigned t_bits;
649
-
650
- if (TARGET_LONG_BITS == 32) {
651
- tcg_out_ext32u(s, TCG_REG_T1, addr);
652
- addr = TCG_REG_T1;
653
+ if (ldst) {
654
+ ldst->type = data_type;
655
+ ldst->datalo_reg = data;
656
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
657
}
658
-
659
- /*
660
- * Normal case: alignment equal to access size.
661
- */
662
- if (a_bits == s_bits) {
663
- tcg_out_ldst_rr(s, data, addr, index,
664
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
665
- return;
666
- }
667
-
668
- /*
669
- * Test for at least natural alignment, and assume most accesses
670
- * will be aligned -- perform a straight store in the delay slot.
671
- * This is required to preserve atomicity for aligned accesses.
672
- */
673
- t_bits = MAX(a_bits, s_bits);
674
- tcg_debug_assert(t_bits < 13);
675
- tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
676
-
677
- /* beq,a,pt %icc, label */
678
- label_ptr = s->code_ptr;
679
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
680
- /* delay slot */
681
- tcg_out_ldst_rr(s, data, addr, index,
682
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
683
-
684
- if (a_bits >= s_bits) {
685
- /*
686
- * Overalignment: A successful alignment test will perform the memory
687
- * operation in the delay slot, and failure need only invoke the
688
- * handler for SIGBUS.
689
- */
690
- tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
691
- /* delay slot -- move to low part of argument reg */
692
- tcg_out_mov_delay(s, TCG_REG_O1, addr);
693
- } else {
694
- /* Underalignment: store by pieces of minimum alignment. */
695
- int st_opc, a_size, s_size, i;
696
-
697
- /*
698
- * Force full address into T1 early; avoids problems with
699
- * overlap between @addr and @data.
700
- */
701
- tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
702
-
703
- a_size = 1 << a_bits;
704
- s_size = 1 << s_bits;
705
- if ((memop & MO_BSWAP) == MO_BE) {
706
- st_opc = qemu_st_opc[a_bits | MO_BE];
707
- for (i = 0; i < s_size; i += a_size) {
708
- TCGReg d = data;
709
- int shift = (s_size - a_size - i) * 8;
710
- if (shift) {
711
- d = TCG_REG_T2;
712
- tcg_out_arithi(s, d, data, shift, SHIFT_SRLX);
713
- }
714
- tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc);
715
- }
716
- } else if (a_bits == 0) {
717
- tcg_out_ldst(s, data, TCG_REG_T1, 0, STB);
718
- for (i = 1; i < s_size; i++) {
719
- tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
720
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB);
721
- }
722
- } else {
723
- /* Note that ST*A with immediate asi must use indexed address. */
724
- st_opc = qemu_st_opc[a_bits + MO_LE];
725
- tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc);
726
- for (i = a_size; i < s_size; i += a_size) {
727
- tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
728
- tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
729
- tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc);
730
- }
731
- }
732
- }
733
-
734
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
735
-#endif /* CONFIG_SOFTMMU */
736
}
737
738
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
739
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
740
case INDEX_op_extu_i32_i64:
741
case INDEX_op_extrl_i64_i32:
742
case INDEX_op_extrh_i64_i32:
743
+ case INDEX_op_qemu_ld_i32:
744
+ case INDEX_op_qemu_ld_i64:
745
return C_O1_I1(r, r);
746
747
case INDEX_op_st8_i32:
748
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
749
case INDEX_op_st_i32:
750
case INDEX_op_st32_i64:
751
case INDEX_op_st_i64:
752
+ case INDEX_op_qemu_st_i32:
753
+ case INDEX_op_qemu_st_i64:
754
return C_O0_I2(rZ, r);
755
756
case INDEX_op_add_i32:
757
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
758
case INDEX_op_muluh_i64:
759
return C_O1_I2(r, r, r);
760
761
- case INDEX_op_qemu_ld_i32:
762
- case INDEX_op_qemu_ld_i64:
763
- return C_O1_I1(r, s);
764
- case INDEX_op_qemu_st_i32:
765
- case INDEX_op_qemu_st_i64:
766
- return C_O0_I2(sZ, s);
767
-
768
default:
769
g_assert_not_reached();
770
}
771
--
772
2.34.1
diff view generated by jsdifflib
New patch
1
These functions are now unused.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg-ldst.h | 6 ------
7
accel/tcg/user-exec.c | 10 ----------
8
2 files changed, 16 deletions(-)
9
10
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg-ldst.h
13
+++ b/include/tcg/tcg-ldst.h
14
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
15
void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
16
MemOpIdx oi, uintptr_t retaddr);
17
18
-#ifdef CONFIG_USER_ONLY
19
-
20
-G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
21
-G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
22
-
23
-#endif /* CONFIG_USER_ONLY */
24
#endif /* TCG_LDST_H */
25
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/accel/tcg/user-exec.c
28
+++ b/accel/tcg/user-exec.c
29
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
30
31
/* The softmmu versions of these helpers are in cputlb.c. */
32
33
-void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
34
-{
35
- cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
36
-}
37
-
38
-void helper_unaligned_st(CPUArchState *env, target_ulong addr)
39
-{
40
- cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
41
-}
42
-
43
static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
44
MemOp mop, uintptr_t ra, MMUAccessType type)
45
{
46
--
47
2.34.1
diff view generated by jsdifflib
New patch
1
This should be true of all loongarch64 running Linux.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/loongarch64/tcg-target.c.inc | 9 +++++++++
7
1 file changed, 9 insertions(+)
8
9
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/loongarch64/tcg-target.c.inc
12
+++ b/tcg/loongarch64/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@
14
*/
15
16
#include "../tcg-ldst.c.inc"
17
+#include <asm/hwcap.h>
18
19
#ifdef CONFIG_DEBUG_TCG
20
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
21
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
22
23
static void tcg_target_init(TCGContext *s)
24
{
25
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
26
+
27
+ /* Server and desktop class cpus have UAL; embedded cpus do not. */
28
+ if (!(hwcap & HWCAP_LOONGARCH_UAL)) {
29
+ error_report("TCG: unaligned access support required; exiting");
30
+ exit(EXIT_FAILURE);
31
+ }
32
+
33
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
34
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
35
36
--
37
2.34.1
diff view generated by jsdifflib
New patch
1
Test the final byte of an unaligned access.
2
Use BSTRINS.D to clear the range of bits, rather than AND.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/loongarch64/tcg-target.c.inc | 19 ++++++++++++-------
8
1 file changed, 12 insertions(+), 7 deletions(-)
9
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/loongarch64/tcg-target.c.inc
13
+++ b/tcg/loongarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
16
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
17
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
18
- tcg_target_long compare_mask;
19
20
ldst = new_ldst_label(s);
21
ldst->is_ld = is_ld;
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
24
offsetof(CPUTLBEntry, addend));
25
26
- /* We don't support unaligned accesses. */
27
+ /*
28
+ * For aligned accesses, we check the first byte and include the alignment
29
+ * bits within the address. For unaligned access, we check that we don't
30
+ * cross pages using the address of the last byte of the access.
31
+ */
32
if (a_bits < s_bits) {
33
- a_bits = s_bits;
34
+ unsigned a_mask = (1u << a_bits) - 1;
35
+ unsigned s_mask = (1u << s_bits) - 1;
36
+ tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
37
+ } else {
38
+ tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg);
39
}
40
- /* Clear the non-page, non-alignment bits from the address. */
41
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
42
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
43
- tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
44
+ tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
45
+ a_bits, TARGET_PAGE_BITS - 1);
46
47
/* Compare masked address with the TLB entry. */
48
ldst->label_ptr[0] = s->code_ptr;
49
--
50
2.34.1
diff view generated by jsdifflib
New patch
1
The system is required to emulate unaligned accesses, even if the
2
hardware does not support it. The resulting trap may or may not
3
be more efficient than the qemu slow path. There are linux kernel
4
patches in flight to allow userspace to query hardware support;
5
we can re-evaluate whether to enable this by default after that.
1
6
7
In the meantime, softmmu now matches useronly, where we already
8
assumed that unaligned accesses are supported.
9
10
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/riscv/tcg-target.c.inc | 48 ++++++++++++++++++++++----------------
14
1 file changed, 28 insertions(+), 20 deletions(-)
15
16
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/riscv/tcg-target.c.inc
19
+++ b/tcg/riscv/tcg-target.c.inc
20
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
21
22
#ifdef CONFIG_SOFTMMU
23
unsigned s_bits = opc & MO_SIZE;
24
+ unsigned s_mask = (1u << s_bits) - 1;
25
int mem_index = get_mmuidx(oi);
26
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
27
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
28
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
29
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
30
- tcg_target_long compare_mask;
31
+ int compare_mask;
32
+ TCGReg addr_adj;
33
34
ldst = new_ldst_label(s);
35
ldst->is_ld = is_ld;
36
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
37
38
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
39
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
40
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
41
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
42
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
43
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
44
45
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
46
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
47
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
48
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
49
50
+ /*
51
+ * For aligned accesses, we check the first byte and include the alignment
52
+ * bits within the address. For unaligned access, we check that we don't
53
+ * cross pages using the address of the last byte of the access.
54
+ */
55
+ addr_adj = addr_reg;
56
+ if (a_bits < s_bits) {
57
+ addr_adj = TCG_REG_TMP0;
58
+ tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
59
+ addr_adj, addr_reg, s_mask - a_mask);
60
+ }
61
+ compare_mask = TARGET_PAGE_MASK | a_mask;
62
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
63
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
64
+ } else {
65
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
66
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
67
+ }
68
+
69
/* Load the tlb comparator and the addend. */
70
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
71
is_ld ? offsetof(CPUTLBEntry, addr_read)
72
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
73
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
74
offsetof(CPUTLBEntry, addend));
75
76
- /* We don't support unaligned accesses. */
77
- if (a_bits < s_bits) {
78
- a_bits = s_bits;
79
- }
80
- /* Clear the non-page, non-alignment bits from the address. */
81
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
82
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
83
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
84
- } else {
85
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
86
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
87
- }
88
-
89
/* Compare masked address with the TLB entry. */
90
ldst->label_ptr[0] = s->code_ptr;
91
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
92
93
/* TLB Hit - translate address using addend. */
94
+ addr_adj = addr_reg;
95
if (TARGET_LONG_BITS == 32) {
96
- tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
97
- addr_reg = TCG_REG_TMP0;
98
+ addr_adj = TCG_REG_TMP0;
99
+ tcg_out_ext32u(s, addr_adj, addr_reg);
100
}
101
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
102
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_adj);
103
*pbase = TCG_REG_TMP0;
104
#else
105
if (a_mask) {
106
--
107
2.34.1
diff view generated by jsdifflib
New patch
1
Replace the unparameterized TCG_TARGET_HAS_MEMORY_BSWAP macro
2
with a function with a memop argument.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/aarch64/tcg-target.h | 1 -
8
tcg/arm/tcg-target.h | 1 -
9
tcg/i386/tcg-target.h | 3 ---
10
tcg/loongarch64/tcg-target.h | 2 --
11
tcg/mips/tcg-target.h | 2 --
12
tcg/ppc/tcg-target.h | 1 -
13
tcg/riscv/tcg-target.h | 2 --
14
tcg/s390x/tcg-target.h | 2 --
15
tcg/sparc64/tcg-target.h | 1 -
16
tcg/tcg-internal.h | 2 ++
17
tcg/tci/tcg-target.h | 2 --
18
tcg/tcg-op.c | 20 +++++++++++---------
19
tcg/aarch64/tcg-target.c.inc | 5 +++++
20
tcg/arm/tcg-target.c.inc | 5 +++++
21
tcg/i386/tcg-target.c.inc | 5 +++++
22
tcg/loongarch64/tcg-target.c.inc | 5 +++++
23
tcg/mips/tcg-target.c.inc | 5 +++++
24
tcg/ppc/tcg-target.c.inc | 5 +++++
25
tcg/riscv/tcg-target.c.inc | 5 +++++
26
tcg/s390x/tcg-target.c.inc | 5 +++++
27
tcg/sparc64/tcg-target.c.inc | 5 +++++
28
tcg/tci/tcg-target.c.inc | 5 +++++
29
22 files changed, 63 insertions(+), 26 deletions(-)
30
31
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/aarch64/tcg-target.h
34
+++ b/tcg/aarch64/tcg-target.h
35
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
36
#define TCG_TARGET_HAS_cmpsel_vec 0
37
38
#define TCG_TARGET_DEFAULT_MO (0)
39
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
40
#define TCG_TARGET_NEED_LDST_LABELS
41
#define TCG_TARGET_NEED_POOL_LABELS
42
43
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tcg/arm/tcg-target.h
46
+++ b/tcg/arm/tcg-target.h
47
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
48
#define TCG_TARGET_HAS_cmpsel_vec 0
49
50
#define TCG_TARGET_DEFAULT_MO (0)
51
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
52
#define TCG_TARGET_NEED_LDST_LABELS
53
#define TCG_TARGET_NEED_POOL_LABELS
54
55
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/tcg/i386/tcg-target.h
58
+++ b/tcg/i386/tcg-target.h
59
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
60
#include "tcg/tcg-mo.h"
61
62
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
63
-
64
-#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
65
-
66
#define TCG_TARGET_NEED_LDST_LABELS
67
#define TCG_TARGET_NEED_POOL_LABELS
68
69
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
70
index XXXXXXX..XXXXXXX 100644
71
--- a/tcg/loongarch64/tcg-target.h
72
+++ b/tcg/loongarch64/tcg-target.h
73
@@ -XXX,XX +XXX,XX @@ typedef enum {
74
75
#define TCG_TARGET_NEED_LDST_LABELS
76
77
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
78
-
79
#endif /* LOONGARCH_TCG_TARGET_H */
80
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/mips/tcg-target.h
83
+++ b/tcg/mips/tcg-target.h
84
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
85
#endif
86
87
#define TCG_TARGET_DEFAULT_MO 0
88
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
89
-
90
#define TCG_TARGET_NEED_LDST_LABELS
91
92
#endif
93
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/tcg/ppc/tcg-target.h
96
+++ b/tcg/ppc/tcg-target.h
97
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
98
#define TCG_TARGET_HAS_cmpsel_vec 0
99
100
#define TCG_TARGET_DEFAULT_MO (0)
101
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
102
#define TCG_TARGET_NEED_LDST_LABELS
103
#define TCG_TARGET_NEED_POOL_LABELS
104
105
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/tcg/riscv/tcg-target.h
108
+++ b/tcg/riscv/tcg-target.h
109
@@ -XXX,XX +XXX,XX @@ typedef enum {
110
#define TCG_TARGET_NEED_LDST_LABELS
111
#define TCG_TARGET_NEED_POOL_LABELS
112
113
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
114
-
115
#endif
116
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
117
index XXXXXXX..XXXXXXX 100644
118
--- a/tcg/s390x/tcg-target.h
119
+++ b/tcg/s390x/tcg-target.h
120
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
121
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
122
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
123
124
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
125
-
126
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
127
#define TCG_TARGET_NEED_LDST_LABELS
128
#define TCG_TARGET_NEED_POOL_LABELS
129
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
130
index XXXXXXX..XXXXXXX 100644
131
--- a/tcg/sparc64/tcg-target.h
132
+++ b/tcg/sparc64/tcg-target.h
133
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
134
#define TCG_AREG0 TCG_REG_I0
135
136
#define TCG_TARGET_DEFAULT_MO (0)
137
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
138
#define TCG_TARGET_NEED_LDST_LABELS
139
#define TCG_TARGET_NEED_POOL_LABELS
140
141
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
142
index XXXXXXX..XXXXXXX 100644
143
--- a/tcg/tcg-internal.h
144
+++ b/tcg/tcg-internal.h
145
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t)
146
return temp_tcgv_i64(tcgv_i128_temp(t) + o);
147
}
148
149
+bool tcg_target_has_memory_bswap(MemOp memop);
150
+
151
#endif /* TCG_INTERNAL_H */
152
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
153
index XXXXXXX..XXXXXXX 100644
154
--- a/tcg/tci/tcg-target.h
155
+++ b/tcg/tci/tcg-target.h
156
@@ -XXX,XX +XXX,XX @@ typedef enum {
157
We prefer consistency across hosts on this. */
158
#define TCG_TARGET_DEFAULT_MO (0)
159
160
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
161
-
162
#endif /* TCG_TARGET_H */
163
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/tcg/tcg-op.c
166
+++ b/tcg/tcg-op.c
167
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
168
oi = make_memop_idx(memop, idx);
169
170
orig_memop = memop;
171
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
172
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
173
memop &= ~MO_BSWAP;
174
/* The bswap primitive benefits from zero-extended input. */
175
if ((memop & MO_SSIZE) == MO_SW) {
176
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
177
memop = tcg_canonicalize_memop(memop, 0, 1);
178
oi = make_memop_idx(memop, idx);
179
180
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
181
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
182
swap = tcg_temp_ebb_new_i32();
183
switch (memop & MO_SIZE) {
184
case MO_16:
185
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
186
oi = make_memop_idx(memop, idx);
187
188
orig_memop = memop;
189
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
190
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
191
memop &= ~MO_BSWAP;
192
/* The bswap primitive benefits from zero-extended input. */
193
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
194
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
195
memop = tcg_canonicalize_memop(memop, 1, 1);
196
oi = make_memop_idx(memop, idx);
197
198
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
199
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
200
swap = tcg_temp_ebb_new_i64();
201
switch (memop & MO_SIZE) {
202
case MO_16:
203
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
204
tcg_debug_assert((orig & MO_SIZE) == MO_128);
205
tcg_debug_assert((orig & MO_SIGN) == 0);
206
207
- /* Use a memory ordering implemented by the host. */
208
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
209
- mop_1 &= ~MO_BSWAP;
210
- }
211
-
212
/* Reduce the size to 64-bit. */
213
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
214
215
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
216
default:
217
g_assert_not_reached();
218
}
219
+
220
+ /* Use a memory ordering implemented by the host. */
221
+ if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
222
+ mop_1 &= ~MO_BSWAP;
223
+ mop_2 &= ~MO_BSWAP;
224
+ }
225
+
226
ret[0] = mop_1;
227
ret[1] = mop_2;
228
}
229
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
230
index XXXXXXX..XXXXXXX 100644
231
--- a/tcg/aarch64/tcg-target.c.inc
232
+++ b/tcg/aarch64/tcg-target.c.inc
233
@@ -XXX,XX +XXX,XX @@ typedef struct {
234
TCGType index_ext;
235
} HostAddress;
236
237
+bool tcg_target_has_memory_bswap(MemOp memop)
238
+{
239
+ return false;
240
+}
241
+
242
static const TCGLdstHelperParam ldst_helper_param = {
243
.ntmp = 1, .tmp = { TCG_REG_TMP }
244
};
245
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
246
index XXXXXXX..XXXXXXX 100644
247
--- a/tcg/arm/tcg-target.c.inc
248
+++ b/tcg/arm/tcg-target.c.inc
249
@@ -XXX,XX +XXX,XX @@ typedef struct {
250
bool index_scratch;
251
} HostAddress;
252
253
+bool tcg_target_has_memory_bswap(MemOp memop)
254
+{
255
+ return false;
256
+}
257
+
258
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
259
{
260
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
261
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
262
index XXXXXXX..XXXXXXX 100644
263
--- a/tcg/i386/tcg-target.c.inc
264
+++ b/tcg/i386/tcg-target.c.inc
265
@@ -XXX,XX +XXX,XX @@ typedef struct {
266
int seg;
267
} HostAddress;
268
269
+bool tcg_target_has_memory_bswap(MemOp memop)
270
+{
271
+ return have_movbe;
272
+}
273
+
274
/*
275
* Because i686 has no register parameters and because x86_64 has xchg
276
* to handle addr/data register overlap, we have placed all input arguments
277
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
278
index XXXXXXX..XXXXXXX 100644
279
--- a/tcg/loongarch64/tcg-target.c.inc
280
+++ b/tcg/loongarch64/tcg-target.c.inc
281
@@ -XXX,XX +XXX,XX @@ typedef struct {
282
TCGReg index;
283
} HostAddress;
284
285
+bool tcg_target_has_memory_bswap(MemOp memop)
286
+{
287
+ return false;
288
+}
289
+
290
/*
291
* For softmmu, perform the TLB load and compare.
292
* For useronly, perform any required alignment tests.
293
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
294
index XXXXXXX..XXXXXXX 100644
295
--- a/tcg/mips/tcg-target.c.inc
296
+++ b/tcg/mips/tcg-target.c.inc
297
@@ -XXX,XX +XXX,XX @@ typedef struct {
298
MemOp align;
299
} HostAddress;
300
301
+bool tcg_target_has_memory_bswap(MemOp memop)
302
+{
303
+ return false;
304
+}
305
+
306
/*
307
* For softmmu, perform the TLB load and compare.
308
* For useronly, perform any required alignment tests.
309
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
310
index XXXXXXX..XXXXXXX 100644
311
--- a/tcg/ppc/tcg-target.c.inc
312
+++ b/tcg/ppc/tcg-target.c.inc
313
@@ -XXX,XX +XXX,XX @@ typedef struct {
314
TCGReg index;
315
} HostAddress;
316
317
+bool tcg_target_has_memory_bswap(MemOp memop)
318
+{
319
+ return true;
320
+}
321
+
322
/*
323
* For softmmu, perform the TLB load and compare.
324
* For useronly, perform any required alignment tests.
325
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
326
index XXXXXXX..XXXXXXX 100644
327
--- a/tcg/riscv/tcg-target.c.inc
328
+++ b/tcg/riscv/tcg-target.c.inc
329
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
330
tcg_debug_assert(ok);
331
}
332
333
+bool tcg_target_has_memory_bswap(MemOp memop)
334
+{
335
+ return false;
336
+}
337
+
338
/* We have three temps, we might as well expose them. */
339
static const TCGLdstHelperParam ldst_helper_param = {
340
.ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
341
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
342
index XXXXXXX..XXXXXXX 100644
343
--- a/tcg/s390x/tcg-target.c.inc
344
+++ b/tcg/s390x/tcg-target.c.inc
345
@@ -XXX,XX +XXX,XX @@ typedef struct {
346
int disp;
347
} HostAddress;
348
349
+bool tcg_target_has_memory_bswap(MemOp memop)
350
+{
351
+ return true;
352
+}
353
+
354
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
355
HostAddress h)
356
{
357
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
358
index XXXXXXX..XXXXXXX 100644
359
--- a/tcg/sparc64/tcg-target.c.inc
360
+++ b/tcg/sparc64/tcg-target.c.inc
361
@@ -XXX,XX +XXX,XX @@ typedef struct {
362
TCGReg index;
363
} HostAddress;
364
365
+bool tcg_target_has_memory_bswap(MemOp memop)
366
+{
367
+ return true;
368
+}
369
+
370
/*
371
* For softmmu, perform the TLB load and compare.
372
* For useronly, perform any required alignment tests.
373
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
374
index XXXXXXX..XXXXXXX 100644
375
--- a/tcg/tci/tcg-target.c.inc
376
+++ b/tcg/tci/tcg-target.c.inc
377
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
378
static inline void tcg_target_qemu_prologue(TCGContext *s)
379
{
380
}
381
+
382
+bool tcg_target_has_memory_bswap(MemOp memop)
383
+{
384
+ return true;
385
+}
386
--
387
2.34.1
diff view generated by jsdifflib
New patch
1
Add opcodes for backend support for 128-bit memory operations.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
docs/devel/tcg-ops.rst | 11 +++---
8
include/tcg/tcg-opc.h | 8 +++++
9
tcg/aarch64/tcg-target.h | 2 ++
10
tcg/arm/tcg-target.h | 2 ++
11
tcg/i386/tcg-target.h | 2 ++
12
tcg/loongarch64/tcg-target.h | 1 +
13
tcg/mips/tcg-target.h | 2 ++
14
tcg/ppc/tcg-target.h | 2 ++
15
tcg/riscv/tcg-target.h | 2 ++
16
tcg/s390x/tcg-target.h | 2 ++
17
tcg/sparc64/tcg-target.h | 2 ++
18
tcg/tci/tcg-target.h | 2 ++
19
tcg/tcg-op.c | 69 ++++++++++++++++++++++++++++++++----
20
tcg/tcg.c | 6 ++++
21
14 files changed, 103 insertions(+), 10 deletions(-)
22
23
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
24
index XXXXXXX..XXXXXXX 100644
25
--- a/docs/devel/tcg-ops.rst
26
+++ b/docs/devel/tcg-ops.rst
27
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
28
| This operation is optional. If the TCG backend does not implement the
29
goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
30
31
- * - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx*
32
+ * - qemu_ld_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
33
34
- qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx*
35
+ qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
36
37
qemu_st8_i32 *t0*, *t1*, *flags*, *memidx*
38
39
- | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest
40
- address *t1*. The _i32/_i64 size applies to the size of the input/output
41
+ address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output
42
register *t0* only. The address *t1* is always sized according to the guest,
43
and the width of the memory operation is controlled by *flags*.
44
|
45
| Both *t0* and *t1* may be split into little-endian ordered pairs of registers
46
- if dealing with 64-bit quantities on a 32-bit host.
47
+ if dealing with 64-bit quantities on a 32-bit host, or 128-bit quantities on
48
+ a 64-bit host.
49
|
50
| The *memidx* selects the qemu tlb index to use (e.g. user or kernel access).
51
The flags are the MemOp bits, selecting the sign, width, and endianness
52
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
53
| For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
54
64-bit memory access specified in *flags*.
55
|
56
+ | For qemu_ld/st_i128, these are only supported for a 64-bit host.
57
+ |
58
| For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
59
the memory operation is known to be 8-bit. This allows the backend to
60
provide a different set of register constraints.
61
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/tcg/tcg-opc.h
64
+++ b/include/tcg/tcg-opc.h
65
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
66
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
67
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
68
69
+/* Only for 64-bit hosts at the moment. */
70
+DEF(qemu_ld_i128, 2, 1, 1,
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
72
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
73
+DEF(qemu_st_i128, 0, 3, 1,
74
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
75
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
76
+
77
/* Host vector support. */
78
79
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
80
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/aarch64/tcg-target.h
83
+++ b/tcg/aarch64/tcg-target.h
84
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
85
#define TCG_TARGET_HAS_muluh_i64 1
86
#define TCG_TARGET_HAS_mulsh_i64 1
87
88
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
89
+
90
#define TCG_TARGET_HAS_v64 1
91
#define TCG_TARGET_HAS_v128 1
92
#define TCG_TARGET_HAS_v256 0
93
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/tcg/arm/tcg-target.h
96
+++ b/tcg/arm/tcg-target.h
97
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
98
#define TCG_TARGET_HAS_rem_i32 0
99
#define TCG_TARGET_HAS_qemu_st8_i32 0
100
101
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
102
+
103
#define TCG_TARGET_HAS_v64 use_neon_instructions
104
#define TCG_TARGET_HAS_v128 use_neon_instructions
105
#define TCG_TARGET_HAS_v256 0
106
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
107
index XXXXXXX..XXXXXXX 100644
108
--- a/tcg/i386/tcg-target.h
109
+++ b/tcg/i386/tcg-target.h
110
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
111
#define TCG_TARGET_HAS_qemu_st8_i32 1
112
#endif
113
114
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
115
+
116
/* We do not support older SSE systems, only beginning with AVX1. */
117
#define TCG_TARGET_HAS_v64 have_avx1
118
#define TCG_TARGET_HAS_v128 have_avx1
119
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
120
index XXXXXXX..XXXXXXX 100644
121
--- a/tcg/loongarch64/tcg-target.h
122
+++ b/tcg/loongarch64/tcg-target.h
123
@@ -XXX,XX +XXX,XX @@ typedef enum {
124
#define TCG_TARGET_HAS_muls2_i64 0
125
#define TCG_TARGET_HAS_muluh_i64 1
126
#define TCG_TARGET_HAS_mulsh_i64 1
127
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
128
129
#define TCG_TARGET_DEFAULT_MO (0)
130
131
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/tcg/mips/tcg-target.h
134
+++ b/tcg/mips/tcg-target.h
135
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
136
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
137
#endif
138
139
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
140
+
141
#define TCG_TARGET_DEFAULT_MO 0
142
#define TCG_TARGET_NEED_LDST_LABELS
143
144
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
145
index XXXXXXX..XXXXXXX 100644
146
--- a/tcg/ppc/tcg-target.h
147
+++ b/tcg/ppc/tcg-target.h
148
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
149
#define TCG_TARGET_HAS_mulsh_i64 1
150
#endif
151
152
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
153
+
154
/*
155
* While technically Altivec could support V64, it has no 64-bit store
156
* instruction and substituting two 32-bit stores makes the generated
157
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tcg/riscv/tcg-target.h
160
+++ b/tcg/riscv/tcg-target.h
161
@@ -XXX,XX +XXX,XX @@ typedef enum {
162
#define TCG_TARGET_HAS_muluh_i64 1
163
#define TCG_TARGET_HAS_mulsh_i64 1
164
165
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
166
+
167
#define TCG_TARGET_DEFAULT_MO (0)
168
169
#define TCG_TARGET_NEED_LDST_LABELS
170
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
171
index XXXXXXX..XXXXXXX 100644
172
--- a/tcg/s390x/tcg-target.h
173
+++ b/tcg/s390x/tcg-target.h
174
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
175
#define TCG_TARGET_HAS_muluh_i64 0
176
#define TCG_TARGET_HAS_mulsh_i64 0
177
178
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
179
+
180
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
181
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
182
#define TCG_TARGET_HAS_v256 0
183
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
184
index XXXXXXX..XXXXXXX 100644
185
--- a/tcg/sparc64/tcg-target.h
186
+++ b/tcg/sparc64/tcg-target.h
187
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
188
#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
189
#define TCG_TARGET_HAS_mulsh_i64 0
190
191
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
192
+
193
#define TCG_AREG0 TCG_REG_I0
194
195
#define TCG_TARGET_DEFAULT_MO (0)
196
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
197
index XXXXXXX..XXXXXXX 100644
198
--- a/tcg/tci/tcg-target.h
199
+++ b/tcg/tci/tcg-target.h
200
@@ -XXX,XX +XXX,XX @@
201
#define TCG_TARGET_HAS_mulu2_i32 1
202
#endif /* TCG_TARGET_REG_BITS == 64 */
203
204
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
205
+
206
/* Number of registers available. */
207
#define TCG_TARGET_NB_REGS 16
208
209
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
210
index XXXXXXX..XXXXXXX 100644
211
--- a/tcg/tcg-op.c
212
+++ b/tcg/tcg-op.c
213
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
214
215
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
216
{
217
- MemOpIdx oi = make_memop_idx(memop, idx);
218
+ const MemOpIdx oi = make_memop_idx(memop, idx);
219
220
tcg_debug_assert((memop & MO_SIZE) == MO_128);
221
tcg_debug_assert((memop & MO_SIGN) == 0);
222
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
223
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
224
addr = plugin_prep_mem_callbacks(addr);
225
226
- /* TODO: allow the tcg backend to see the whole operation. */
227
+ /* TODO: For now, force 32-bit hosts to use the helper. */
228
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
229
+ TCGv_i64 lo, hi;
230
+ TCGArg addr_arg;
231
+ MemOpIdx adj_oi;
232
+ bool need_bswap = false;
233
234
- if (use_two_i64_for_i128(memop)) {
235
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
236
+ lo = TCGV128_HIGH(val);
237
+ hi = TCGV128_LOW(val);
238
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
239
+ need_bswap = true;
240
+ } else {
241
+ lo = TCGV128_LOW(val);
242
+ hi = TCGV128_HIGH(val);
243
+ adj_oi = oi;
244
+ }
245
+
246
+#if TARGET_LONG_BITS == 32
247
+ addr_arg = tcgv_i32_arg(addr);
248
+#else
249
+ addr_arg = tcgv_i64_arg(addr);
250
+#endif
251
+ tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
252
+
253
+ if (need_bswap) {
254
+ tcg_gen_bswap64_i64(lo, lo);
255
+ tcg_gen_bswap64_i64(hi, hi);
256
+ }
257
+ } else if (use_two_i64_for_i128(memop)) {
258
MemOp mop[2];
259
TCGv addr_p8;
260
TCGv_i64 x, y;
261
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
262
263
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
264
{
265
- MemOpIdx oi = make_memop_idx(memop, idx);
266
+ const MemOpIdx oi = make_memop_idx(memop, idx);
267
268
tcg_debug_assert((memop & MO_SIZE) == MO_128);
269
tcg_debug_assert((memop & MO_SIGN) == 0);
270
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
271
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
272
addr = plugin_prep_mem_callbacks(addr);
273
274
- /* TODO: allow the tcg backend to see the whole operation. */
275
+ /* TODO: For now, force 32-bit hosts to use the helper. */
276
277
- if (use_two_i64_for_i128(memop)) {
278
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
279
+ TCGv_i64 lo, hi;
280
+ TCGArg addr_arg;
281
+ MemOpIdx adj_oi;
282
+ bool need_bswap = false;
283
+
284
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
285
+ lo = tcg_temp_new_i64();
286
+ hi = tcg_temp_new_i64();
287
+ tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
288
+ tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
289
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
290
+ need_bswap = true;
291
+ } else {
292
+ lo = TCGV128_LOW(val);
293
+ hi = TCGV128_HIGH(val);
294
+ adj_oi = oi;
295
+ }
296
+
297
+#if TARGET_LONG_BITS == 32
298
+ addr_arg = tcgv_i32_arg(addr);
299
+#else
300
+ addr_arg = tcgv_i64_arg(addr);
301
+#endif
302
+ tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
303
+
304
+ if (need_bswap) {
305
+ tcg_temp_free_i64(lo);
306
+ tcg_temp_free_i64(hi);
307
+ }
308
+ } else if (use_two_i64_for_i128(memop)) {
309
MemOp mop[2];
310
TCGv addr_p8;
311
TCGv_i64 x, y;
312
diff --git a/tcg/tcg.c b/tcg/tcg.c
313
index XXXXXXX..XXXXXXX 100644
314
--- a/tcg/tcg.c
315
+++ b/tcg/tcg.c
316
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
317
case INDEX_op_qemu_st8_i32:
318
return TCG_TARGET_HAS_qemu_st8_i32;
319
320
+ case INDEX_op_qemu_ld_i128:
321
+ case INDEX_op_qemu_st_i128:
322
+ return TCG_TARGET_HAS_qemu_ldst_i128;
323
+
324
case INDEX_op_mov_i32:
325
case INDEX_op_setcond_i32:
326
case INDEX_op_brcond_i32:
327
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
328
case INDEX_op_qemu_st8_i32:
329
case INDEX_op_qemu_ld_i64:
330
case INDEX_op_qemu_st_i64:
331
+ case INDEX_op_qemu_ld_i128:
332
+ case INDEX_op_qemu_st_i128:
333
{
334
const char *s_al, *s_op, *s_at;
335
MemOpIdx oi = op->args[k++];
336
--
337
2.34.1
338
339
diff view generated by jsdifflib
New patch
1
With x86_64 as host, we do not have any temporaries with which to
2
resolve cycles, but we do have xchg. As a side bonus, the set of
3
graphs that can be made with 3 nodes and all nodes conflicting is
4
small: two. We can solve the cycle with a single temp.
1
5
6
This is required for x86_64 to handle stores of i128: 1 address
7
register and 2 data registers.
8
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
tcg/tcg.c | 138 ++++++++++++++++++++++++++++++++++++++++++------------
13
1 file changed, 108 insertions(+), 30 deletions(-)
14
15
diff --git a/tcg/tcg.c b/tcg/tcg.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/tcg.c
18
+++ b/tcg/tcg.c
19
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
20
tcg_out_movext1_new_src(s, i1, src1);
21
}
22
23
+/**
24
+ * tcg_out_movext3 -- move and extend three pair
25
+ * @s: tcg context
26
+ * @i1: first move description
27
+ * @i2: second move description
28
+ * @i3: third move description
29
+ * @scratch: temporary register, or -1 for none
30
+ *
31
+ * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
32
+ * between the sources and destinations.
33
+ */
34
+
35
+static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
36
+ const TCGMovExtend *i2, const TCGMovExtend *i3,
37
+ int scratch)
38
+{
39
+ TCGReg src1 = i1->src;
40
+ TCGReg src2 = i2->src;
41
+ TCGReg src3 = i3->src;
42
+
43
+ if (i1->dst != src2 && i1->dst != src3) {
44
+ tcg_out_movext1(s, i1);
45
+ tcg_out_movext2(s, i2, i3, scratch);
46
+ return;
47
+ }
48
+ if (i2->dst != src1 && i2->dst != src3) {
49
+ tcg_out_movext1(s, i2);
50
+ tcg_out_movext2(s, i1, i3, scratch);
51
+ return;
52
+ }
53
+ if (i3->dst != src1 && i3->dst != src2) {
54
+ tcg_out_movext1(s, i3);
55
+ tcg_out_movext2(s, i1, i2, scratch);
56
+ return;
57
+ }
58
+
59
+ /*
60
+ * There is a cycle. Since there are only 3 nodes, the cycle is
61
+ * either "clockwise" or "anti-clockwise", and can be solved with
62
+ * a single scratch or two xchg.
63
+ */
64
+ if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
65
+ /* "Clockwise" */
66
+ if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
67
+ tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
68
+ /* The data is now in the correct registers, now extend. */
69
+ tcg_out_movext1_new_src(s, i1, i1->dst);
70
+ tcg_out_movext1_new_src(s, i2, i2->dst);
71
+ tcg_out_movext1_new_src(s, i3, i3->dst);
72
+ } else {
73
+ tcg_debug_assert(scratch >= 0);
74
+ tcg_out_mov(s, i1->src_type, scratch, src1);
75
+ tcg_out_movext1(s, i3);
76
+ tcg_out_movext1(s, i2);
77
+ tcg_out_movext1_new_src(s, i1, scratch);
78
+ }
79
+ } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
80
+ /* "Anti-clockwise" */
81
+ if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
82
+ tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
83
+ /* The data is now in the correct registers, now extend. */
84
+ tcg_out_movext1_new_src(s, i1, i1->dst);
85
+ tcg_out_movext1_new_src(s, i2, i2->dst);
86
+ tcg_out_movext1_new_src(s, i3, i3->dst);
87
+ } else {
88
+ tcg_debug_assert(scratch >= 0);
89
+ tcg_out_mov(s, i1->src_type, scratch, src1);
90
+ tcg_out_movext1(s, i2);
91
+ tcg_out_movext1(s, i3);
92
+ tcg_out_movext1_new_src(s, i1, scratch);
93
+ }
94
+ } else {
95
+ g_assert_not_reached();
96
+ }
97
+}
98
+
99
#define C_PFX1(P, A) P##A
100
#define C_PFX2(P, A, B) P##A##_##B
101
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
102
@@ -XXX,XX +XXX,XX @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
103
104
static void tcg_out_helper_load_regs(TCGContext *s,
105
unsigned nmov, TCGMovExtend *mov,
106
- unsigned ntmp, const int *tmp)
107
+ const TCGLdstHelperParam *parm)
108
{
109
+ TCGReg dst3;
110
+
111
switch (nmov) {
112
- default:
113
+ case 4:
114
/* The backend must have provided enough temps for the worst case. */
115
- tcg_debug_assert(ntmp + 1 >= nmov);
116
+ tcg_debug_assert(parm->ntmp >= 2);
117
118
- for (unsigned i = nmov - 1; i >= 2; --i) {
119
- TCGReg dst = mov[i].dst;
120
+ dst3 = mov[3].dst;
121
+ for (unsigned j = 0; j < 3; ++j) {
122
+ if (dst3 == mov[j].src) {
123
+ /*
124
+ * Conflict. Copy the source to a temporary, perform the
125
+ * remaining moves, then the extension from our scratch
126
+ * on the way out.
127
+ */
128
+ TCGReg scratch = parm->tmp[1];
129
130
- for (unsigned j = 0; j < i; ++j) {
131
- if (dst == mov[j].src) {
132
- /*
133
- * Conflict.
134
- * Copy the source to a temporary, recurse for the
135
- * remaining moves, perform the extension from our
136
- * scratch on the way out.
137
- */
138
- TCGReg scratch = tmp[--ntmp];
139
- tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
140
- mov[i].src = scratch;
141
-
142
- tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
143
- tcg_out_movext1(s, &mov[i]);
144
- return;
145
- }
146
+ tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
147
+ tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
148
+ tcg_out_movext1_new_src(s, &mov[3], scratch);
149
+ break;
150
}
151
-
152
- /* No conflicts: perform this move and continue. */
153
- tcg_out_movext1(s, &mov[i]);
154
}
155
- /* fall through for the final two moves */
156
157
+ /* No conflicts: perform this move and continue. */
158
+ tcg_out_movext1(s, &mov[3]);
159
+ /* fall through */
160
+
161
+ case 3:
162
+ tcg_out_movext3(s, mov, mov + 1, mov + 2,
163
+ parm->ntmp ? parm->tmp[0] : -1);
164
+ break;
165
case 2:
166
- tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
167
- return;
168
+ tcg_out_movext2(s, mov, mov + 1,
169
+ parm->ntmp ? parm->tmp[0] : -1);
170
+ break;
171
case 1:
172
tcg_out_movext1(s, mov);
173
- return;
174
- case 0:
175
+ break;
176
+ default:
177
g_assert_not_reached();
178
}
179
}
180
@@ -XXX,XX +XXX,XX @@ static void tcg_out_helper_load_slots(TCGContext *s,
181
for (i = 0; i < nmov; ++i) {
182
mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
183
}
184
- tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
185
+ tcg_out_helper_load_regs(s, nmov, mov, parm);
186
}
187
188
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
189
--
190
2.34.1
diff view generated by jsdifflib
New patch
1
Now that tcg_out_helper_load_regs is not recursive, we can
2
merge it into its only caller, tcg_out_helper_load_slots.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 89 +++++++++++++++++++++++++------------------------------
8
1 file changed, 41 insertions(+), 48 deletions(-)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
15
return ofs;
16
}
17
18
-static void tcg_out_helper_load_regs(TCGContext *s,
19
- unsigned nmov, TCGMovExtend *mov,
20
- const TCGLdstHelperParam *parm)
21
+static void tcg_out_helper_load_slots(TCGContext *s,
22
+ unsigned nmov, TCGMovExtend *mov,
23
+ const TCGLdstHelperParam *parm)
24
{
25
+ unsigned i;
26
TCGReg dst3;
27
28
+ /*
29
+ * Start from the end, storing to the stack first.
30
+ * This frees those registers, so we need not consider overlap.
31
+ */
32
+ for (i = nmov; i-- > 0; ) {
33
+ unsigned slot = mov[i].dst;
34
+
35
+ if (arg_slot_reg_p(slot)) {
36
+ goto found_reg;
37
+ }
38
+
39
+ TCGReg src = mov[i].src;
40
+ TCGType dst_type = mov[i].dst_type;
41
+ MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
42
+
43
+ /* The argument is going onto the stack; extend into scratch. */
44
+ if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
45
+ tcg_debug_assert(parm->ntmp != 0);
46
+ mov[i].dst = src = parm->tmp[0];
47
+ tcg_out_movext1(s, &mov[i]);
48
+ }
49
+
50
+ tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
51
+ tcg_out_helper_stk_ofs(dst_type, slot));
52
+ }
53
+ return;
54
+
55
+ found_reg:
56
+ /*
57
+ * The remaining arguments are in registers.
58
+ * Convert slot numbers to argument registers.
59
+ */
60
+ nmov = i + 1;
61
+ for (i = 0; i < nmov; ++i) {
62
+ mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
63
+ }
64
+
65
switch (nmov) {
66
case 4:
67
/* The backend must have provided enough temps for the worst case. */
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_helper_load_regs(TCGContext *s,
69
}
70
}
71
72
-static void tcg_out_helper_load_slots(TCGContext *s,
73
- unsigned nmov, TCGMovExtend *mov,
74
- const TCGLdstHelperParam *parm)
75
-{
76
- unsigned i;
77
-
78
- /*
79
- * Start from the end, storing to the stack first.
80
- * This frees those registers, so we need not consider overlap.
81
- */
82
- for (i = nmov; i-- > 0; ) {
83
- unsigned slot = mov[i].dst;
84
-
85
- if (arg_slot_reg_p(slot)) {
86
- goto found_reg;
87
- }
88
-
89
- TCGReg src = mov[i].src;
90
- TCGType dst_type = mov[i].dst_type;
91
- MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
92
-
93
- /* The argument is going onto the stack; extend into scratch. */
94
- if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
95
- tcg_debug_assert(parm->ntmp != 0);
96
- mov[i].dst = src = parm->tmp[0];
97
- tcg_out_movext1(s, &mov[i]);
98
- }
99
-
100
- tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
101
- tcg_out_helper_stk_ofs(dst_type, slot));
102
- }
103
- return;
104
-
105
- found_reg:
106
- /*
107
- * The remaining arguments are in registers.
108
- * Convert slot numbers to argument registers.
109
- */
110
- nmov = i + 1;
111
- for (i = 0; i < nmov; ++i) {
112
- mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
113
- }
114
- tcg_out_helper_load_regs(s, nmov, mov, parm);
115
-}
116
-
117
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
118
TCGType type, tcg_target_long imm,
119
const TCGLdstHelperParam *parm)
120
--
121
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/tcg.c | 196 +++++++++++++++++++++++++++++++++++++++++++++---------
5
1 file changed, 163 insertions(+), 33 deletions(-)
1
6
7
diff --git a/tcg/tcg.c b/tcg/tcg.c
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/tcg.c
10
+++ b/tcg/tcg.c
11
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
12
[MO_UQ] = helper_ldq_mmu,
13
#if TCG_TARGET_REG_BITS == 64
14
[MO_SL] = helper_ldsl_mmu,
15
+ [MO_128] = helper_ld16_mmu,
16
#endif
17
};
18
19
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
20
[MO_16] = helper_stw_mmu,
21
[MO_32] = helper_stl_mmu,
22
[MO_64] = helper_stq_mmu,
23
+#if TCG_TARGET_REG_BITS == 64
24
+ [MO_128] = helper_st16_mmu,
25
+#endif
26
};
27
28
TCGContext tcg_init_ctx;
29
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld64_mmu = {
30
| dh_typemask(ptr, 4) /* uintptr_t ra */
31
};
32
33
+static TCGHelperInfo info_helper_ld128_mmu = {
34
+ .flags = TCG_CALL_NO_WG,
35
+ .typemask = dh_typemask(i128, 0) /* return Int128 */
36
+ | dh_typemask(env, 1)
37
+ | dh_typemask(tl, 2) /* target_ulong addr */
38
+ | dh_typemask(i32, 3) /* unsigned oi */
39
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
40
+};
41
+
42
static TCGHelperInfo info_helper_st32_mmu = {
43
.flags = TCG_CALL_NO_WG,
44
.typemask = dh_typemask(void, 0)
45
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st64_mmu = {
46
| dh_typemask(ptr, 5) /* uintptr_t ra */
47
};
48
49
+static TCGHelperInfo info_helper_st128_mmu = {
50
+ .flags = TCG_CALL_NO_WG,
51
+ .typemask = dh_typemask(void, 0)
52
+ | dh_typemask(env, 1)
53
+ | dh_typemask(tl, 2) /* target_ulong addr */
54
+ | dh_typemask(i128, 3) /* Int128 data */
55
+ | dh_typemask(i32, 4) /* unsigned oi */
56
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
57
+};
58
+
59
#ifdef CONFIG_TCG_INTERPRETER
60
static ffi_type *typecode_to_ffi(int argmask)
61
{
62
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
63
64
init_call_layout(&info_helper_ld32_mmu);
65
init_call_layout(&info_helper_ld64_mmu);
66
+ init_call_layout(&info_helper_ld128_mmu);
67
init_call_layout(&info_helper_st32_mmu);
68
init_call_layout(&info_helper_st64_mmu);
69
+ init_call_layout(&info_helper_st128_mmu);
70
71
#ifdef CONFIG_TCG_INTERPRETER
72
init_ffi_layouts();
73
@@ -XXX,XX +XXX,XX @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
74
TCGType dst_type, TCGType src_type,
75
TCGReg lo, TCGReg hi)
76
{
77
+ MemOp reg_mo;
78
+
79
if (dst_type <= TCG_TYPE_REG) {
80
MemOp src_ext;
81
82
@@ -XXX,XX +XXX,XX @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
83
return 1;
84
}
85
86
- assert(TCG_TARGET_REG_BITS == 32);
87
+ if (TCG_TARGET_REG_BITS == 32) {
88
+ assert(dst_type == TCG_TYPE_I64);
89
+ reg_mo = MO_32;
90
+ } else {
91
+ assert(dst_type == TCG_TYPE_I128);
92
+ reg_mo = MO_64;
93
+ }
94
95
mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
96
mov[0].src = lo;
97
- mov[0].dst_type = TCG_TYPE_I32;
98
- mov[0].src_type = TCG_TYPE_I32;
99
- mov[0].src_ext = MO_32;
100
+ mov[0].dst_type = TCG_TYPE_REG;
101
+ mov[0].src_type = TCG_TYPE_REG;
102
+ mov[0].src_ext = reg_mo;
103
104
mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
105
mov[1].src = hi;
106
- mov[1].dst_type = TCG_TYPE_I32;
107
- mov[1].src_type = TCG_TYPE_I32;
108
- mov[1].src_ext = MO_32;
109
+ mov[1].dst_type = TCG_TYPE_REG;
110
+ mov[1].src_type = TCG_TYPE_REG;
111
+ mov[1].src_ext = reg_mo;
112
113
return 2;
114
}
115
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
116
case MO_64:
117
info = &info_helper_ld64_mmu;
118
break;
119
+ case MO_128:
120
+ info = &info_helper_ld128_mmu;
121
+ break;
122
default:
123
g_assert_not_reached();
124
}
125
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
126
127
tcg_out_helper_load_slots(s, nmov, mov, parm);
128
129
- /* No special attention for 32 and 64-bit return values. */
130
- tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
131
+ switch (info->out_kind) {
132
+ case TCG_CALL_RET_NORMAL:
133
+ case TCG_CALL_RET_BY_VEC:
134
+ break;
135
+ case TCG_CALL_RET_BY_REF:
136
+ /*
137
+ * The return reference is in the first argument slot.
138
+ * We need memory in which to return: re-use the top of stack.
139
+ */
140
+ {
141
+ int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
142
+
143
+ if (arg_slot_reg_p(0)) {
144
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
145
+ TCG_REG_CALL_STACK, ofs_slot0);
146
+ } else {
147
+ tcg_debug_assert(parm->ntmp != 0);
148
+ tcg_out_addi_ptr(s, parm->tmp[0],
149
+ TCG_REG_CALL_STACK, ofs_slot0);
150
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
151
+ TCG_REG_CALL_STACK, ofs_slot0);
152
+ }
153
+ }
154
+ break;
155
+ default:
156
+ g_assert_not_reached();
157
+ }
158
159
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
160
}
161
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
162
bool load_sign,
163
const TCGLdstHelperParam *parm)
164
{
165
+ MemOp mop = get_memop(ldst->oi);
166
TCGMovExtend mov[2];
167
+ int ofs_slot0;
168
169
- if (ldst->type <= TCG_TYPE_REG) {
170
- MemOp mop = get_memop(ldst->oi);
171
+ switch (ldst->type) {
172
+ case TCG_TYPE_I64:
173
+ if (TCG_TARGET_REG_BITS == 32) {
174
+ break;
175
+ }
176
+ /* fall through */
177
178
+ case TCG_TYPE_I32:
179
mov[0].dst = ldst->datalo_reg;
180
mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
181
mov[0].dst_type = ldst->type;
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
183
mov[0].src_ext = mop & MO_SSIZE;
184
}
185
tcg_out_movext1(s, mov);
186
- } else {
187
- assert(TCG_TARGET_REG_BITS == 32);
188
+ return;
189
190
- mov[0].dst = ldst->datalo_reg;
191
- mov[0].src =
192
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
193
- mov[0].dst_type = TCG_TYPE_I32;
194
- mov[0].src_type = TCG_TYPE_I32;
195
- mov[0].src_ext = MO_32;
196
+ case TCG_TYPE_I128:
197
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
198
+ ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
199
+ switch (TCG_TARGET_CALL_RET_I128) {
200
+ case TCG_CALL_RET_NORMAL:
201
+ break;
202
+ case TCG_CALL_RET_BY_VEC:
203
+ tcg_out_st(s, TCG_TYPE_V128,
204
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
205
+ TCG_REG_CALL_STACK, ofs_slot0);
206
+ /* fall through */
207
+ case TCG_CALL_RET_BY_REF:
208
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
209
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
210
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
211
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
212
+ return;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ break;
217
218
- mov[1].dst = ldst->datahi_reg;
219
- mov[1].src =
220
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
221
- mov[1].dst_type = TCG_TYPE_REG;
222
- mov[1].src_type = TCG_TYPE_REG;
223
- mov[1].src_ext = MO_32;
224
-
225
- tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
226
+ default:
227
+ g_assert_not_reached();
228
}
229
+
230
+ mov[0].dst = ldst->datalo_reg;
231
+ mov[0].src =
232
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
233
+ mov[0].dst_type = TCG_TYPE_I32;
234
+ mov[0].src_type = TCG_TYPE_I32;
235
+ mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
236
+
237
+ mov[1].dst = ldst->datahi_reg;
238
+ mov[1].src =
239
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
240
+ mov[1].dst_type = TCG_TYPE_REG;
241
+ mov[1].src_type = TCG_TYPE_REG;
242
+ mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
243
+
244
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
245
}
246
247
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
248
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
249
info = &info_helper_st64_mmu;
250
data_type = TCG_TYPE_I64;
251
break;
252
+ case MO_128:
253
+ info = &info_helper_st128_mmu;
254
+ data_type = TCG_TYPE_I128;
255
+ break;
256
default:
257
g_assert_not_reached();
258
}
259
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
260
261
/* Handle data argument. */
262
loc = &info->in[next_arg];
263
- n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
264
- ldst->datalo_reg, ldst->datahi_reg);
265
- next_arg += n;
266
- nmov += n;
267
- tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
268
+ switch (loc->kind) {
269
+ case TCG_CALL_ARG_NORMAL:
270
+ case TCG_CALL_ARG_EXTEND_U:
271
+ case TCG_CALL_ARG_EXTEND_S:
272
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
273
+ ldst->datalo_reg, ldst->datahi_reg);
274
+ next_arg += n;
275
+ nmov += n;
276
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
277
+ break;
278
+
279
+ case TCG_CALL_ARG_BY_REF:
280
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
281
+ tcg_debug_assert(data_type == TCG_TYPE_I128);
282
+ tcg_out_st(s, TCG_TYPE_I64,
283
+ HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
284
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
285
+ tcg_out_st(s, TCG_TYPE_I64,
286
+ HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
287
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
288
+
289
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
290
+
291
+ if (arg_slot_reg_p(loc->arg_slot)) {
292
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
293
+ TCG_REG_CALL_STACK,
294
+ arg_slot_stk_ofs(loc->ref_slot));
295
+ } else {
296
+ tcg_debug_assert(parm->ntmp != 0);
297
+ tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
298
+ arg_slot_stk_ofs(loc->ref_slot));
299
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
300
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
301
+ }
302
+ next_arg += 2;
303
+ break;
304
+
305
+ default:
306
+ g_assert_not_reached();
307
+ }
308
309
- tcg_out_helper_load_slots(s, nmov, mov, parm);
310
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
311
}
312
313
--
314
2.34.1
diff view generated by jsdifflib
New patch
1
Examine MemOp for atomicity and alignment, adjusting alignment
2
as required to implement atomicity on the host.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/tcg.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
8
1 file changed, 95 insertions(+)
9
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/tcg.c
13
+++ b/tcg/tcg.c
14
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
15
#endif
16
};
17
18
+typedef struct {
19
+ MemOp atom; /* lg2 bits of atomicity required */
20
+ MemOp align; /* lg2 bits of alignment to use */
21
+} TCGAtomAlign;
22
+
23
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
24
+ MemOp host_atom, bool allow_two_ops)
25
+ __attribute__((unused));
26
+
27
TCGContext tcg_init_ctx;
28
__thread TCGContext *tcg_ctx;
29
30
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
31
}
32
}
33
34
+/**
35
+ * atom_and_align_for_opc:
36
+ * @s: tcg context
37
+ * @opc: memory operation code
38
+ * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
39
+ * @allow_two_ops: true if we are prepared to issue two operations
40
+ *
41
+ * Return the alignment and atomicity to use for the inline fast path
42
+ * for the given memory operation. The alignment may be larger than
43
+ * that specified in @opc, and the correct alignment will be diagnosed
44
+ * by the slow path helper.
45
+ *
46
+ * If @allow_two_ops, the host is prepared to test for 2x alignment,
47
+ * and issue two loads or stores for subalignment.
48
+ */
49
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
50
+ MemOp host_atom, bool allow_two_ops)
51
+{
52
+ MemOp align = get_alignment_bits(opc);
53
+ MemOp size = opc & MO_SIZE;
54
+ MemOp half = size ? size - 1 : 0;
55
+ MemOp atmax;
56
+ MemOp atom;
57
+
58
+ /* When serialized, no further atomicity required. */
59
+ if (s->gen_tb->cflags & CF_PARALLEL) {
60
+ atom = opc & MO_ATOM_MASK;
61
+ } else {
62
+ atom = MO_ATOM_NONE;
63
+ }
64
+
65
+ switch (atom) {
66
+ case MO_ATOM_NONE:
67
+ /* The operation requires no specific atomicity. */
68
+ atmax = MO_8;
69
+ break;
70
+
71
+ case MO_ATOM_IFALIGN:
72
+ atmax = size;
73
+ break;
74
+
75
+ case MO_ATOM_IFALIGN_PAIR:
76
+ atmax = half;
77
+ break;
78
+
79
+ case MO_ATOM_WITHIN16:
80
+ atmax = size;
81
+ if (size == MO_128) {
82
+ /* Misalignment implies !within16, and therefore no atomicity. */
83
+ } else if (host_atom != MO_ATOM_WITHIN16) {
84
+ /* The host does not implement within16, so require alignment. */
85
+ align = MAX(align, size);
86
+ }
87
+ break;
88
+
89
+ case MO_ATOM_WITHIN16_PAIR:
90
+ atmax = size;
91
+ /*
92
+ * Misalignment implies !within16, and therefore half atomicity.
93
+ * Any host prepared for two operations can implement this with
94
+ * half alignment.
95
+ */
96
+ if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
97
+ align = MAX(align, half);
98
+ }
99
+ break;
100
+
101
+ case MO_ATOM_SUBALIGN:
102
+ atmax = size;
103
+ if (host_atom != MO_ATOM_SUBALIGN) {
104
+ /* If unaligned but not odd, there are subobjects up to half. */
105
+ if (allow_two_ops) {
106
+ align = MAX(align, half);
107
+ } else {
108
+ align = MAX(align, size);
109
+ }
110
+ }
111
+ break;
112
+
113
+ default:
114
+ g_assert_not_reached();
115
+ }
116
+
117
+ return (TCGAtomAlign){ .atom = atmax, .align = align };
118
+}
119
+
120
/*
121
* Similarly for qemu_ld/st slow path helpers.
122
* We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
123
--
124
2.34.1
diff view generated by jsdifflib
New patch
1
No change to the ultimate load/store routines yet, so some atomicity
2
conditions not yet honored, but plumbs the change to alignment through
3
the relevant functions.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 27 +++++++++++++++------------
9
1 file changed, 15 insertions(+), 12 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
int index;
17
int ofs;
18
int seg;
19
+ TCGAtomAlign aa;
20
} HostAddress;
21
22
bool tcg_target_has_memory_bswap(MemOp memop)
23
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
{
25
TCGLabelQemuLdst *ldst = NULL;
26
MemOp opc = get_memop(oi);
27
- unsigned a_bits = get_alignment_bits(opc);
28
- unsigned a_mask = (1 << a_bits) - 1;
29
+ unsigned a_mask;
30
+
31
+#ifdef CONFIG_SOFTMMU
32
+ h->index = TCG_REG_L0;
33
+ h->ofs = 0;
34
+ h->seg = 0;
35
+#else
36
+ *h = x86_guest_base;
37
+#endif
38
+ h->base = addrlo;
39
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
40
+ a_mask = (1 << h->aa.align) - 1;
41
42
#ifdef CONFIG_SOFTMMU
43
int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
* copy the address and mask. For lesser alignments, check that we don't
46
* cross pages for the complete access.
47
*/
48
- if (a_bits >= s_bits) {
49
+ if (a_mask >= s_mask) {
50
tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
51
} else {
52
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
/* TLB Hit. */
55
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
56
offsetof(CPUTLBEntry, addend));
57
-
58
- *h = (HostAddress) {
59
- .base = addrlo,
60
- .index = TCG_REG_L0,
61
- };
62
#else
63
- if (a_bits) {
64
+ if (a_mask) {
65
ldst = new_ldst_label(s);
66
67
ldst->is_ld = is_ld;
68
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
69
ldst->label_ptr[0] = s->code_ptr;
70
s->code_ptr += 4;
71
}
72
-
73
- *h = x86_guest_base;
74
- h->base = addrlo;
75
#endif
76
77
return ldst;
78
--
79
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/aarch64/tcg-target.c.inc | 36 ++++++++++++++++++------------------
5
1 file changed, 18 insertions(+), 18 deletions(-)
1
6
7
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/aarch64/tcg-target.c.inc
10
+++ b/tcg/aarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ typedef struct {
12
TCGReg base;
13
TCGReg index;
14
TCGType index_ext;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned a_mask = (1u << a_bits) - 1;
25
+ unsigned a_mask;
26
+
27
+ h->aa = atom_and_align_for_opc(s, opc,
28
+ have_lse2 ? MO_ATOM_WITHIN16
29
+ : MO_ATOM_IFALIGN,
30
+ false);
31
+ a_mask = (1 << h->aa.align) - 1;
32
33
#ifdef CONFIG_SOFTMMU
34
unsigned s_bits = opc & MO_SIZE;
35
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
36
* bits within the address. For unaligned access, we check that we don't
37
* cross pages using the address of the last byte of the access.
38
*/
39
- if (a_bits >= s_bits) {
40
+ if (a_mask >= s_mask) {
41
x3 = addr_reg;
42
} else {
43
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
ldst->label_ptr[0] = s->code_ptr;
46
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
47
48
- *h = (HostAddress){
49
- .base = TCG_REG_X1,
50
- .index = addr_reg,
51
- .index_ext = addr_type
52
- };
53
+ h->base = TCG_REG_X1,
54
+ h->index = addr_reg;
55
+ h->index_ext = addr_type;
56
#else
57
if (a_mask) {
58
ldst = new_ldst_label(s);
59
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
60
}
61
62
if (USE_GUEST_BASE) {
63
- *h = (HostAddress){
64
- .base = TCG_REG_GUEST_BASE,
65
- .index = addr_reg,
66
- .index_ext = addr_type
67
- };
68
+ h->base = TCG_REG_GUEST_BASE;
69
+ h->index = addr_reg;
70
+ h->index_ext = addr_type;
71
} else {
72
- *h = (HostAddress){
73
- .base = addr_reg,
74
- .index = TCG_REG_XZR,
75
- .index_ext = TCG_TYPE_I64
76
- };
77
+ h->base = addr_reg;
78
+ h->index = TCG_REG_XZR;
79
+ h->index_ext = TCG_TYPE_I64;
80
}
81
#endif
82
83
--
84
2.34.1
diff view generated by jsdifflib
New patch
1
No change to the ultimate load/store routines yet, so some atomicity
2
conditions not yet honored, but plumbs the change to alignment through
3
the relevant functions.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/arm/tcg-target.c.inc | 39 ++++++++++++++++++++++-----------------
9
1 file changed, 22 insertions(+), 17 deletions(-)
10
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/arm/tcg-target.c.inc
14
+++ b/tcg/arm/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
TCGReg base;
17
int index;
18
bool index_scratch;
19
+ TCGAtomAlign aa;
20
} HostAddress;
21
22
bool tcg_target_has_memory_bswap(MemOp memop)
23
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
{
25
TCGLabelQemuLdst *ldst = NULL;
26
MemOp opc = get_memop(oi);
27
- MemOp a_bits = get_alignment_bits(opc);
28
- unsigned a_mask = (1 << a_bits) - 1;
29
+ unsigned a_mask;
30
+
31
+#ifdef CONFIG_SOFTMMU
32
+ *h = (HostAddress){
33
+ .cond = COND_AL,
34
+ .base = addrlo,
35
+ .index = TCG_REG_R1,
36
+ .index_scratch = true,
37
+ };
38
+#else
39
+ *h = (HostAddress){
40
+ .cond = COND_AL,
41
+ .base = addrlo,
42
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
43
+ .index_scratch = false,
44
+ };
45
+#endif
46
+
47
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
48
+ a_mask = (1 << h->aa.align) - 1;
49
50
#ifdef CONFIG_SOFTMMU
51
int mem_index = get_mmuidx(oi);
52
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
53
if (TARGET_LONG_BITS == 64) {
54
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
55
}
56
-
57
- *h = (HostAddress){
58
- .cond = COND_AL,
59
- .base = addrlo,
60
- .index = TCG_REG_R1,
61
- .index_scratch = true,
62
- };
63
#else
64
if (a_mask) {
65
ldst = new_ldst_label(s);
66
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
67
ldst->addrlo_reg = addrlo;
68
ldst->addrhi_reg = addrhi;
69
70
- /* We are expecting a_bits to max out at 7 */
71
+ /* We are expecting alignment to max out at 7 */
72
tcg_debug_assert(a_mask <= 0xff);
73
/* tst addr, #mask */
74
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
75
}
76
-
77
- *h = (HostAddress){
78
- .cond = COND_AL,
79
- .base = addrlo,
80
- .index = guest_base ? TCG_REG_GUEST_BASE : -1,
81
- .index_scratch = false,
82
- };
83
#endif
84
85
return ldst;
86
--
87
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/loongarch64/tcg-target.c.inc | 6 +++++-
5
1 file changed, 5 insertions(+), 1 deletion(-)
1
6
7
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/loongarch64/tcg-target.c.inc
10
+++ b/tcg/loongarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
+ MemOp a_bits;
25
+
26
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
27
+ a_bits = h->aa.align;
28
29
#ifdef CONFIG_SOFTMMU
30
unsigned s_bits = opc & MO_SIZE;
31
--
32
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/mips/tcg-target.c.inc | 15 +++++++++------
5
1 file changed, 9 insertions(+), 6 deletions(-)
1
6
7
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/mips/tcg-target.c.inc
10
+++ b/tcg/mips/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
12
13
typedef struct {
14
TCGReg base;
15
- MemOp align;
16
+ TCGAtomAlign aa;
17
} HostAddress;
18
19
bool tcg_target_has_memory_bswap(MemOp memop)
20
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
21
{
22
TCGLabelQemuLdst *ldst = NULL;
23
MemOp opc = get_memop(oi);
24
- unsigned a_bits = get_alignment_bits(opc);
25
+ MemOp a_bits;
26
unsigned s_bits = opc & MO_SIZE;
27
- unsigned a_mask = (1 << a_bits) - 1;
28
+ unsigned a_mask;
29
TCGReg base;
30
31
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
32
+ a_bits = h->aa.align;
33
+ a_mask = (1 << a_bits) - 1;
34
+
35
#ifdef CONFIG_SOFTMMU
36
unsigned s_mask = (1 << s_bits) - 1;
37
int mem_index = get_mmuidx(oi);
38
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
39
#endif
40
41
h->base = base;
42
- h->align = a_bits;
43
return ldst;
44
}
45
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
47
48
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
49
50
- if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
51
+ if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
52
tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
53
} else {
54
tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
55
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
56
57
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
58
59
- if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
60
+ if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
61
tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
62
} else {
63
tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/ppc/tcg-target.c.inc | 19 ++++++++++++++++++-
5
1 file changed, 18 insertions(+), 1 deletion(-)
1
6
7
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/ppc/tcg-target.c.inc
10
+++ b/tcg/ppc/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
+ MemOp a_bits;
25
+
26
+ /*
27
+ * Book II, Section 1.4, Single-Copy Atomicity, specifies:
28
+ *
29
+ * Before 3.0, "An access that is not atomic is performed as a set of
30
+ * smaller disjoint atomic accesses. In general, the number and alignment
31
+ * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN.
32
+ *
33
+ * As of 3.0, "the non-atomic access is performed as described in
34
+ * the corresponding list", which matches MO_ATOM_SUBALIGN.
35
+ */
36
+ h->aa = atom_and_align_for_opc(s, opc,
37
+ have_isa_3_00 ? MO_ATOM_SUBALIGN
38
+ : MO_ATOM_IFALIGN,
39
+ false);
40
+ a_bits = h->aa.align;
41
42
#ifdef CONFIG_SOFTMMU
43
int mem_index = get_mmuidx(oi);
44
--
45
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/riscv/tcg-target.c.inc | 13 ++++++++-----
5
1 file changed, 8 insertions(+), 5 deletions(-)
1
6
7
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/riscv/tcg-target.c.inc
10
+++ b/tcg/riscv/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
12
{
13
TCGLabelQemuLdst *ldst = NULL;
14
MemOp opc = get_memop(oi);
15
- unsigned a_bits = get_alignment_bits(opc);
16
- unsigned a_mask = (1u << a_bits) - 1;
17
+ TCGAtomAlign aa;
18
+ unsigned a_mask;
19
+
20
+ aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
21
+ a_mask = (1u << aa.align) - 1;
22
23
#ifdef CONFIG_SOFTMMU
24
unsigned s_bits = opc & MO_SIZE;
25
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
26
* cross pages using the address of the last byte of the access.
27
*/
28
addr_adj = addr_reg;
29
- if (a_bits < s_bits) {
30
+ if (a_mask < s_mask) {
31
addr_adj = TCG_REG_TMP0;
32
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
33
addr_adj, addr_reg, s_mask - a_mask);
34
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
35
ldst->oi = oi;
36
ldst->addrlo_reg = addr_reg;
37
38
- /* We are expecting a_bits max 7, so we can always use andi. */
39
- tcg_debug_assert(a_bits < 12);
40
+ /* We are expecting alignment max 7, so we can always use andi. */
41
+ tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
42
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
43
44
ldst->label_ptr[0] = s->code_ptr;
45
--
46
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/s390x/tcg-target.c.inc | 11 +++++++----
5
1 file changed, 7 insertions(+), 4 deletions(-)
1
6
7
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/s390x/tcg-target.c.inc
10
+++ b/tcg/s390x/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ typedef struct {
12
TCGReg base;
13
TCGReg index;
14
int disp;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned a_mask = (1u << a_bits) - 1;
25
+ unsigned a_mask;
26
+
27
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
28
+ a_mask = (1 << h->aa.align) - 1;
29
30
#ifdef CONFIG_SOFTMMU
31
unsigned s_bits = opc & MO_SIZE;
32
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
33
* bits within the address. For unaligned access, we check that we don't
34
* cross pages using the address of the last byte of the access.
35
*/
36
- a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
37
+ a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
38
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
39
if (a_off == 0) {
40
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
41
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
42
ldst->addrlo_reg = addr_reg;
43
44
/* We are expecting a_bits to max out at 7, much lower than TMLL. */
45
- tcg_debug_assert(a_bits < 16);
46
+ tcg_debug_assert(a_mask <= 0xffff);
47
tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
48
49
tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
50
--
51
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 21 ++++++++++++---------
5
1 file changed, 12 insertions(+), 9 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned s_bits = opc & MO_SIZE;
25
+ MemOp s_bits = opc & MO_SIZE;
26
unsigned a_mask;
27
28
/* We don't support unaligned accesses. */
29
- a_bits = MAX(a_bits, s_bits);
30
- a_mask = (1u << a_bits) - 1;
31
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
32
+ h->aa.align = MAX(h->aa.align, s_bits);
33
+ a_mask = (1u << h->aa.align) - 1;
34
35
#ifdef CONFIG_SOFTMMU
36
int mem_index = get_mmuidx(oi);
37
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
38
cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
39
tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
40
#else
41
- if (a_bits != s_bits) {
42
- /*
43
- * Test for at least natural alignment, and defer
44
- * everything else to the helper functions.
45
- */
46
+ /*
47
+ * If the size equals the required alignment, we can skip the test
48
+ * and allow host SIGBUS to deliver SIGBUS to the guest.
49
+ * Otherwise, test for at least natural alignment and defer
50
+ * everything else to the helper functions.
51
+ */
52
+ if (s_bits != get_alignment_bits(opc)) {
53
tcg_debug_assert(check_fit_tl(a_mask, 13));
54
tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
55
56
--
57
2.34.1
diff view generated by jsdifflib
New patch
1
Use the fpu to perform 64-bit loads and stores.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/i386/tcg-target.c.inc | 44 +++++++++++++++++++++++++++++++++------
7
1 file changed, 38 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
14
#define OPC_GRP5 (0xff)
15
#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
16
17
+#define OPC_ESCDF (0xdf)
18
+#define ESCDF_FILD_m64 5
19
+#define ESCDF_FISTP_m64 7
20
+
21
/* Group 1 opcode extensions for 0x80-0x83.
22
These are also used as modifiers for OPC_ARITH. */
23
#define ARITH_ADD 0
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
25
datalo = datahi;
26
datahi = t;
27
}
28
- if (h.base == datalo || h.index == datalo) {
29
+ if (h.aa.atom == MO_64) {
30
+ /*
31
+ * Atomicity requires that we use use a single 8-byte load.
32
+ * For simplicity and code size, always use the FPU for this.
33
+ * Similar insns using SSE/AVX are merely larger.
34
+ * Load from memory in one go, then store back to the stack,
35
+ * from whence we can load into the correct integer regs.
36
+ */
37
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FILD_m64,
38
+ h.base, h.index, 0, h.ofs);
39
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FISTP_m64, TCG_REG_ESP, 0);
40
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
41
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
42
+ } else if (h.base == datalo || h.index == datalo) {
43
tcg_out_modrm_sib_offset(s, OPC_LEA, datahi,
44
h.base, h.index, 0, h.ofs);
45
tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0);
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
47
if (TCG_TARGET_REG_BITS == 64) {
48
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
49
h.base, h.index, 0, h.ofs);
50
+ break;
51
+ }
52
+ if (use_movbe) {
53
+ TCGReg t = datalo;
54
+ datalo = datahi;
55
+ datahi = t;
56
+ }
57
+ if (h.aa.atom == MO_64) {
58
+ /*
59
+ * Atomicity requires that we use use one 8-byte store.
60
+ * For simplicity, and code size, always use the FPU for this.
61
+ * Similar insns using SSE/AVX are merely larger.
62
+ * Assemble the 8-byte quantity in required endianness
63
+ * on the stack, load to coproc unit, and store.
64
+ */
65
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
66
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
67
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FILD_m64, TCG_REG_ESP, 0);
68
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FISTP_m64,
69
+ h.base, h.index, 0, h.ofs);
70
} else {
71
- if (use_movbe) {
72
- TCGReg t = datalo;
73
- datalo = datahi;
74
- datahi = t;
75
- }
76
tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
77
h.base, h.index, 0, h.ofs);
78
tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
79
--
80
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/i386/tcg-target.h | 3 +-
5
tcg/i386/tcg-target.c.inc | 181 +++++++++++++++++++++++++++++++++++++-
6
2 files changed, 180 insertions(+), 4 deletions(-)
1
7
8
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/i386/tcg-target.h
11
+++ b/tcg/i386/tcg-target.h
12
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
13
#define TCG_TARGET_HAS_qemu_st8_i32 1
14
#endif
15
16
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
17
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
18
+ (TCG_TARGET_REG_BITS == 64 && have_atomic16)
19
20
/* We do not support older SSE systems, only beginning with AVX1. */
21
#define TCG_TARGET_HAS_v64 have_avx1
22
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
23
index XXXXXXX..XXXXXXX 100644
24
--- a/tcg/i386/tcg-target.c.inc
25
+++ b/tcg/i386/tcg-target.c.inc
26
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
27
#endif
28
};
29
30
+#define TCG_TMP_VEC TCG_REG_XMM5
31
+
32
static const int tcg_target_call_iarg_regs[] = {
33
#if TCG_TARGET_REG_BITS == 64
34
#if defined(_WIN64)
35
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
36
#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
37
#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
38
#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
39
+#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16)
40
+#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16)
41
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
42
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
43
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
44
@@ -XXX,XX +XXX,XX @@ typedef struct {
45
46
bool tcg_target_has_memory_bswap(MemOp memop)
47
{
48
- return have_movbe;
49
+ TCGAtomAlign aa;
50
+
51
+ if (!have_movbe) {
52
+ return false;
53
+ }
54
+ if ((memop & MO_SIZE) <= MO_64) {
55
+ return true;
56
+ }
57
+
58
+ /*
59
+ * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
60
+ * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
61
+ */
62
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
63
+ return aa.atom <= MO_64;
64
}
65
66
/*
67
@@ -XXX,XX +XXX,XX @@ static const TCGLdstHelperParam ldst_helper_param = {
68
static const TCGLdstHelperParam ldst_helper_param = { };
69
#endif
70
71
+static void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
72
+ TCGReg l, TCGReg h, TCGReg v)
73
+{
74
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
75
+
76
+ /* vpmov{d,q} %v, %l */
77
+ tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
78
+ /* vpextr{d,q} $1, %v, %h */
79
+ tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
80
+ tcg_out8(s, 1);
81
+}
82
+
83
+static void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
84
+ TCGReg v, TCGReg l, TCGReg h)
85
+{
86
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
87
+
88
+ /* vmov{d,q} %l, %v */
89
+ tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
90
+ /* vpinsr{d,q} $1, %h, %v, %v */
91
+ tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
92
+ tcg_out8(s, 1);
93
+}
94
+
95
/*
96
* Generate code for the slow path for a load at the end of block
97
*/
98
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
99
{
100
TCGLabelQemuLdst *ldst = NULL;
101
MemOp opc = get_memop(oi);
102
+ MemOp s_bits = opc & MO_SIZE;
103
unsigned a_mask;
104
105
#ifdef CONFIG_SOFTMMU
106
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
107
*h = x86_guest_base;
108
#endif
109
h->base = addrlo;
110
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
111
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
112
a_mask = (1 << h->aa.align) - 1;
113
114
#ifdef CONFIG_SOFTMMU
115
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
116
TCGType tlbtype = TCG_TYPE_I32;
117
int trexw = 0, hrexw = 0, tlbrexw = 0;
118
unsigned mem_index = get_mmuidx(oi);
119
- unsigned s_bits = opc & MO_SIZE;
120
unsigned s_mask = (1 << s_bits) - 1;
121
target_ulong tlb_mask;
122
123
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
124
h.base, h.index, 0, h.ofs + 4);
125
}
126
break;
127
+
128
+ case MO_128:
129
+ {
130
+ TCGLabel *l1 = NULL, *l2 = NULL;
131
+ bool use_pair = h.aa.atom < MO_128;
132
+
133
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
134
+
135
+ if (!use_pair) {
136
+ tcg_debug_assert(!use_movbe);
137
+ /*
138
+ * Atomicity requires that we use use VMOVDQA.
139
+ * If we've already checked for 16-byte alignment, that's all
140
+ * we need. If we arrive here with lesser alignment, then we
141
+ * have determined that less than 16-byte alignment can be
142
+ * satisfied with two 8-byte loads.
143
+ */
144
+ if (h.aa.align < MO_128) {
145
+ use_pair = true;
146
+ l1 = gen_new_label();
147
+ l2 = gen_new_label();
148
+
149
+ tcg_out_testi(s, h.base, 15);
150
+ tcg_out_jxx(s, JCC_JNE, l2, true);
151
+ }
152
+
153
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
154
+ TCG_TMP_VEC, 0,
155
+ h.base, h.index, 0, h.ofs);
156
+ tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo,
157
+ datahi, TCG_TMP_VEC);
158
+
159
+ if (use_pair) {
160
+ tcg_out_jxx(s, JCC_JMP, l1, true);
161
+ tcg_out_label(s, l2);
162
+ }
163
+ }
164
+ if (use_pair) {
165
+ if (use_movbe) {
166
+ TCGReg t = datalo;
167
+ datalo = datahi;
168
+ datahi = t;
169
+ }
170
+ if (h.base == datalo || h.index == datalo) {
171
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
172
+ h.base, h.index, 0, h.ofs);
173
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
174
+ datalo, datahi, 0);
175
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
176
+ datahi, datahi, 8);
177
+ } else {
178
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
179
+ h.base, h.index, 0, h.ofs);
180
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
181
+ h.base, h.index, 0, h.ofs + 8);
182
+ }
183
+ }
184
+ if (l1) {
185
+ tcg_out_label(s, l1);
186
+ }
187
+ }
188
+ break;
189
+
190
default:
191
g_assert_not_reached();
192
}
193
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
194
h.base, h.index, 0, h.ofs + 4);
195
}
196
break;
197
+
198
+ case MO_128:
199
+ {
200
+ TCGLabel *l1 = NULL, *l2 = NULL;
201
+ bool use_pair = h.aa.atom < MO_128;
202
+
203
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
204
+
205
+ if (!use_pair) {
206
+ tcg_debug_assert(!use_movbe);
207
+ /*
208
+ * Atomicity requires that we use use VMOVDQA.
209
+ * If we've already checked for 16-byte alignment, that's all
210
+ * we need. If we arrive here with lesser alignment, then we
211
+ * have determined that less that 16-byte alignment can be
212
+ * satisfied with two 8-byte loads.
213
+ */
214
+ if (h.aa.align < MO_128) {
215
+ use_pair = true;
216
+ l1 = gen_new_label();
217
+ l2 = gen_new_label();
218
+
219
+ tcg_out_testi(s, h.base, 15);
220
+ tcg_out_jxx(s, JCC_JNE, l2, true);
221
+ }
222
+
223
+ tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC,
224
+ datalo, datahi);
225
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
226
+ TCG_TMP_VEC, 0,
227
+ h.base, h.index, 0, h.ofs);
228
+
229
+ if (use_pair) {
230
+ tcg_out_jxx(s, JCC_JMP, l1, true);
231
+ tcg_out_label(s, l2);
232
+ }
233
+ }
234
+ if (use_pair) {
235
+ if (use_movbe) {
236
+ TCGReg t = datalo;
237
+ datalo = datahi;
238
+ datahi = t;
239
+ }
240
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
241
+ h.base, h.index, 0, h.ofs);
242
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
243
+ h.base, h.index, 0, h.ofs + 8);
244
+ }
245
+ if (l1) {
246
+ tcg_out_label(s, l1);
247
+ }
248
+ }
249
+ break;
250
+
251
default:
252
g_assert_not_reached();
253
}
254
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
255
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
256
}
257
break;
258
+ case INDEX_op_qemu_ld_i128:
259
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
260
+ tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
261
+ break;
262
case INDEX_op_qemu_st_i32:
263
case INDEX_op_qemu_st8_i32:
264
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
265
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
266
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
267
}
268
break;
269
+ case INDEX_op_qemu_st_i128:
270
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
271
+ tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
272
+ break;
273
274
OP_32_64(mulu2):
275
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
276
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
277
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
278
: C_O0_I4(L, L, L, L));
279
280
+ case INDEX_op_qemu_ld_i128:
281
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
282
+ return C_O2_I1(r, r, L);
283
+ case INDEX_op_qemu_st_i128:
284
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
285
+ return C_O0_I3(L, L, L);
286
+
287
case INDEX_op_brcond2_i32:
288
return C_O0_I4(r, r, ri, ri);
289
290
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
291
292
s->reserved_regs = 0;
293
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
294
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
295
#ifdef _WIN64
296
/* These are call saved, and we don't save them, so don't use them. */
297
tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
298
--
299
2.34.1
diff view generated by jsdifflib
1
In target/arm we will shortly have "too many" mmu_idx.
1
We will need to allocate a second general-purpose temporary.
2
The current minimum barrier is caused by the way in which
2
Rename the existing temps to add a distinguishing number.
3
tlb_flush_page_by_mmuidx is coded.
4
5
We can remove this limitation by allocating memory for
6
consumption by the worker. Let us assume that this is
7
the unlikely case, as will be the case for the majority
8
of targets which have so far satisfied the BUILD_BUG_ON,
9
and only allocate memory when necessary.
10
3
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
6
---
14
accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++----------
7
tcg/aarch64/tcg-target.c.inc | 50 ++++++++++++++++++------------------
15
1 file changed, 132 insertions(+), 35 deletions(-)
8
1 file changed, 25 insertions(+), 25 deletions(-)
16
9
17
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
18
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/cputlb.c
12
--- a/tcg/aarch64/tcg-target.c.inc
20
+++ b/accel/tcg/cputlb.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
21
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
14
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
15
bool have_lse;
16
bool have_lse2;
17
18
-#define TCG_REG_TMP TCG_REG_X30
19
-#define TCG_VEC_TMP TCG_REG_V31
20
+#define TCG_REG_TMP0 TCG_REG_X30
21
+#define TCG_VEC_TMP0 TCG_REG_V31
22
23
#ifndef CONFIG_SOFTMMU
24
/* Note that XZR cannot be encoded in the address base register slot,
25
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
26
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
27
TCGReg r, TCGReg base, intptr_t offset)
28
{
29
- TCGReg temp = TCG_REG_TMP;
30
+ TCGReg temp = TCG_REG_TMP0;
31
32
if (offset < -0xffffff || offset > 0xffffff) {
33
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
35
}
36
37
/* Worst-case scenario, move offset to temp register, use reg offset. */
38
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
39
- tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
40
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
41
+ tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
42
}
43
44
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
46
if (offset == sextract64(offset, 0, 26)) {
47
tcg_out_insn(s, 3206, BL, offset);
48
} else {
49
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
50
- tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
51
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
52
+ tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
22
}
53
}
23
}
54
}
24
55
25
-/* As we are going to hijack the bottom bits of the page address for a
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
26
- * mmuidx bit mask we need to fail to build if we can't do that
57
AArch64Insn insn;
27
+/**
58
28
+ * tlb_flush_page_by_mmuidx_async_0:
59
if (rl == ah || (!const_bh && rl == bh)) {
29
+ * @cpu: cpu on which to flush
60
- rl = TCG_REG_TMP;
30
+ * @addr: page of virtual address to flush
61
+ rl = TCG_REG_TMP0;
31
+ * @idxmap: set of mmu_idx to flush
62
}
32
+ *
63
33
+ * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
64
if (const_bl) {
34
+ * at @addr from the tlbs indicated by @idxmap from @cpu.
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
35
*/
66
possibility of adding 0+const in the low part, and the
36
-QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
67
immediate add instructions encode XSP not XZR. Don't try
37
-
68
anything more elaborate here than loading another zero. */
38
-static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
69
- al = TCG_REG_TMP;
39
- run_on_cpu_data data)
70
+ al = TCG_REG_TMP0;
40
+static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
71
tcg_out_movi(s, ext, al, 0);
41
+ target_ulong addr,
72
}
42
+ uint16_t idxmap)
73
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
43
{
75
{
44
CPUArchState *env = cpu->env_ptr;
76
TCGReg a1 = a0;
45
- target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
77
if (is_ctz) {
46
- target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
78
- a1 = TCG_REG_TMP;
47
- unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
79
+ a1 = TCG_REG_TMP0;
48
int mmu_idx;
80
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
49
81
}
50
assert_cpu_is_self(cpu);
82
if (const_b && b == (ext ? 64 : 32)) {
51
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
52
- tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
84
AArch64Insn sel = I3506_CSEL;
53
- addr, mmu_idx_bitmap);
85
54
+ tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
86
tcg_out_cmp(s, ext, a0, 0, 1);
55
87
- tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
56
qemu_spin_lock(&env_tlb(env)->c.lock);
88
+ tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
57
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
89
58
- if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
90
if (const_b) {
59
+ if ((idxmap >> mmu_idx) & 1) {
91
if (b == -1) {
60
tlb_flush_page_locked(env, mmu_idx, addr);
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
93
b = d;
94
}
61
}
95
}
62
}
96
- tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
63
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
97
+ tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
64
tb_flush_jmp_cache(cpu, addr);
65
}
66
67
+/**
68
+ * tlb_flush_page_by_mmuidx_async_1:
69
+ * @cpu: cpu on which to flush
70
+ * @data: encoded addr + idxmap
71
+ *
72
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
73
+ * async_run_on_cpu. The idxmap parameter is encoded in the page
74
+ * offset of the target_ptr field. This limits the set of mmu_idx
75
+ * that can be passed via this method.
76
+ */
77
+static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
78
+ run_on_cpu_data data)
79
+{
80
+ target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
81
+ target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
82
+ uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
83
+
84
+ tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
85
+}
86
+
87
+typedef struct {
88
+ target_ulong addr;
89
+ uint16_t idxmap;
90
+} TLBFlushPageByMMUIdxData;
91
+
92
+/**
93
+ * tlb_flush_page_by_mmuidx_async_2:
94
+ * @cpu: cpu on which to flush
95
+ * @data: allocated addr + idxmap
96
+ *
97
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
98
+ * async_run_on_cpu. The addr+idxmap parameters are stored in a
99
+ * TLBFlushPageByMMUIdxData structure that has been allocated
100
+ * specifically for this helper. Free the structure when done.
101
+ */
102
+static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
103
+ run_on_cpu_data data)
104
+{
105
+ TLBFlushPageByMMUIdxData *d = data.host_ptr;
106
+
107
+ tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
108
+ g_free(d);
109
+}
110
+
111
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
112
{
113
- target_ulong addr_and_mmu_idx;
114
-
115
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
116
117
/* This should already be page aligned */
118
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
119
- addr_and_mmu_idx |= idxmap;
120
+ addr &= TARGET_PAGE_MASK;
121
122
- if (!qemu_cpu_is_self(cpu)) {
123
- async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
124
- RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
125
+ if (qemu_cpu_is_self(cpu)) {
126
+ tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
127
+ } else if (idxmap < TARGET_PAGE_SIZE) {
128
+ /*
129
+ * Most targets have only a few mmu_idx. In the case where
130
+ * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
131
+ * allocating memory for this operation.
132
+ */
133
+ async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
134
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
135
} else {
136
- tlb_flush_page_by_mmuidx_async_work(
137
- cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
138
+ TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
139
+
140
+ /* Otherwise allocate a structure, freed by the worker. */
141
+ d->addr = addr;
142
+ d->idxmap = idxmap;
143
+ async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
144
+ RUN_ON_CPU_HOST_PTR(d));
145
}
98
}
146
}
99
}
147
100
148
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
101
@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
149
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
150
uint16_t idxmap)
151
{
152
- const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
153
- target_ulong addr_and_mmu_idx;
154
-
155
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
156
157
/* This should already be page aligned */
158
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
159
- addr_and_mmu_idx |= idxmap;
160
+ addr &= TARGET_PAGE_MASK;
161
162
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
163
- fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
164
+ /*
165
+ * Allocate memory to hold addr+idxmap only when needed.
166
+ * See tlb_flush_page_by_mmuidx for details.
167
+ */
168
+ if (idxmap < TARGET_PAGE_SIZE) {
169
+ flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
170
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
171
+ } else {
172
+ CPUState *dst_cpu;
173
+
174
+ /* Allocate a separate data block for each destination cpu. */
175
+ CPU_FOREACH(dst_cpu) {
176
+ if (dst_cpu != src_cpu) {
177
+ TLBFlushPageByMMUIdxData *d
178
+ = g_new(TLBFlushPageByMMUIdxData, 1);
179
+
180
+ d->addr = addr;
181
+ d->idxmap = idxmap;
182
+ async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
183
+ RUN_ON_CPU_HOST_PTR(d));
184
+ }
185
+ }
186
+ }
187
+
188
+ tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
189
}
102
}
190
103
191
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
104
static const TCGLdstHelperParam ldst_helper_param = {
192
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
105
- .ntmp = 1, .tmp = { TCG_REG_TMP }
193
target_ulong addr,
106
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
194
uint16_t idxmap)
107
};
195
{
108
196
- const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
109
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
197
- target_ulong addr_and_mmu_idx;
110
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
198
-
111
199
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
112
set_jmp_insn_offset(s, which);
200
113
tcg_out32(s, I3206_B);
201
/* This should already be page aligned */
114
- tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
202
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
115
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
203
- addr_and_mmu_idx |= idxmap;
116
set_jmp_reset_offset(s, which);
204
+ addr &= TARGET_PAGE_MASK;
205
206
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
207
- async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
208
+ /*
209
+ * Allocate memory to hold addr+idxmap only when needed.
210
+ * See tlb_flush_page_by_mmuidx for details.
211
+ */
212
+ if (idxmap < TARGET_PAGE_SIZE) {
213
+ flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
214
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
215
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
216
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
217
+ } else {
218
+ CPUState *dst_cpu;
219
+ TLBFlushPageByMMUIdxData *d;
220
+
221
+ /* Allocate a separate data block for each destination cpu. */
222
+ CPU_FOREACH(dst_cpu) {
223
+ if (dst_cpu != src_cpu) {
224
+ d = g_new(TLBFlushPageByMMUIdxData, 1);
225
+ d->addr = addr;
226
+ d->idxmap = idxmap;
227
+ async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
228
+ RUN_ON_CPU_HOST_PTR(d));
229
+ }
230
+ }
231
+
232
+ d = g_new(TLBFlushPageByMMUIdxData, 1);
233
+ d->addr = addr;
234
+ d->idxmap = idxmap;
235
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
236
+ RUN_ON_CPU_HOST_PTR(d));
237
+ }
238
}
117
}
239
118
240
void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
119
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
120
ptrdiff_t i_offset = i_addr - jmp_rx;
121
122
/* Note that we asserted this in range in tcg_out_goto_tb. */
123
- insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
124
+ insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
125
}
126
qatomic_set((uint32_t *)jmp_rw, insn);
127
flush_idcache_range(jmp_rx, jmp_rw, 4);
128
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
129
130
case INDEX_op_rem_i64:
131
case INDEX_op_rem_i32:
132
- tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
133
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
134
+ tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
135
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
136
break;
137
case INDEX_op_remu_i64:
138
case INDEX_op_remu_i32:
139
- tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
140
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
141
+ tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
142
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
143
break;
144
145
case INDEX_op_shl_i64:
146
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
147
if (c2) {
148
tcg_out_rotl(s, ext, a0, a1, a2);
149
} else {
150
- tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
151
- tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
152
+ tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
153
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
154
}
155
break;
156
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
158
break;
159
}
160
}
161
- tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
162
- a2 = TCG_VEC_TMP;
163
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
164
+ a2 = TCG_VEC_TMP0;
165
}
166
if (is_scalar) {
167
insn = cmp_scalar_insn[cond];
168
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
169
s->reserved_regs = 0;
170
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
171
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
172
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
174
- tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
175
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
177
}
178
179
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
241
--
180
--
242
2.20.1
181
2.34.1
243
244
diff view generated by jsdifflib
New patch
1
Use LDXP+STXP when LSE2 is not present and 16-byte atomicity is required,
2
and LDP/STP otherwise. This requires allocating a second general-purpose
3
temporary, as Rs cannot overlap Rn in STXP.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target-con-set.h | 2 +
9
tcg/aarch64/tcg-target.h | 11 +-
10
tcg/aarch64/tcg-target.c.inc | 179 ++++++++++++++++++++++++++++++-
11
3 files changed, 189 insertions(+), 3 deletions(-)
12
13
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/aarch64/tcg-target-con-set.h
16
+++ b/tcg/aarch64/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(lZ, l)
18
C_O0_I2(r, rA)
19
C_O0_I2(rZ, r)
20
C_O0_I2(w, r)
21
+C_O0_I3(lZ, lZ, l)
22
C_O1_I1(r, l)
23
C_O1_I1(r, r)
24
C_O1_I1(w, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wO)
26
C_O1_I2(w, w, wZ)
27
C_O1_I3(w, w, w, w)
28
C_O1_I4(r, r, rA, rZ, rZ)
29
+C_O2_I1(r, r, l)
30
C_O2_I4(r, r, rZ, rZ, rA, rMZ)
31
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/aarch64/tcg-target.h
34
+++ b/tcg/aarch64/tcg-target.h
35
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
36
#define TCG_TARGET_HAS_muluh_i64 1
37
#define TCG_TARGET_HAS_mulsh_i64 1
38
39
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
40
+/*
41
+ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
42
+ * which requires writable pages. We must defer to the helper for user-only,
43
+ * but in system mode all ram is writable for the host.
44
+ */
45
+#ifdef CONFIG_USER_ONLY
46
+#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
47
+#else
48
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
49
+#endif
50
51
#define TCG_TARGET_HAS_v64 1
52
#define TCG_TARGET_HAS_v128 1
53
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/aarch64/tcg-target.c.inc
56
+++ b/tcg/aarch64/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ bool have_lse;
58
bool have_lse2;
59
60
#define TCG_REG_TMP0 TCG_REG_X30
61
+#define TCG_REG_TMP1 TCG_REG_X17
62
#define TCG_VEC_TMP0 TCG_REG_V31
63
64
#ifndef CONFIG_SOFTMMU
65
@@ -XXX,XX +XXX,XX @@ typedef enum {
66
I3305_LDR_v64 = 0x5c000000,
67
I3305_LDR_v128 = 0x9c000000,
68
69
+ /* Load/store exclusive. */
70
+ I3306_LDXP = 0xc8600000,
71
+ I3306_STXP = 0xc8200000,
72
+
73
/* Load/store register. Described here as 3.3.12, but the helper
74
that emits them can transform to 3.3.10 or 3.3.13. */
75
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
76
@@ -XXX,XX +XXX,XX @@ typedef enum {
77
I3406_ADR = 0x10000000,
78
I3406_ADRP = 0x90000000,
79
80
+ /* Add/subtract extended register instructions. */
81
+ I3501_ADD = 0x0b200000,
82
+
83
/* Add/subtract shifted register instructions (without a shift). */
84
I3502_ADD = 0x0b000000,
85
I3502_ADDS = 0x2b000000,
86
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
87
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
88
}
89
90
+static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
91
+ TCGReg rt, TCGReg rt2, TCGReg rn)
92
+{
93
+ tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
94
+}
95
+
96
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
97
TCGReg rt, int imm19)
98
{
99
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
100
tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
101
}
102
103
+static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
104
+ TCGType sf, TCGReg rd, TCGReg rn,
105
+ TCGReg rm, int opt, int imm3)
106
+{
107
+ tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
108
+ imm3 << 10 | rn << 5 | rd);
109
+}
110
+
111
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
112
the rare occasion when we actually want to supply a shift amount. */
113
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
114
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
115
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
116
TCGLabelQemuLdst *ldst = NULL;
117
MemOp opc = get_memop(oi);
118
+ MemOp s_bits = opc & MO_SIZE;
119
unsigned a_mask;
120
121
h->aa = atom_and_align_for_opc(s, opc,
122
have_lse2 ? MO_ATOM_WITHIN16
123
: MO_ATOM_IFALIGN,
124
- false);
125
+ s_bits == MO_128);
126
a_mask = (1 << h->aa.align) - 1;
127
128
#ifdef CONFIG_SOFTMMU
129
- unsigned s_bits = opc & MO_SIZE;
130
unsigned s_mask = (1u << s_bits) - 1;
131
unsigned mem_index = get_mmuidx(oi);
132
TCGReg x3;
133
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
134
}
135
}
136
137
+static TCGLabelQemuLdst *
138
+prepare_host_addr_base_only(TCGContext *s, HostAddress *h, TCGReg addr_reg,
139
+ MemOpIdx oi, bool is_ld)
140
+{
141
+ TCGLabelQemuLdst *ldst;
142
+
143
+ ldst = prepare_host_addr(s, h, addr_reg, oi, true);
144
+
145
+ /* Compose the final address, as LDP/STP have no indexing. */
146
+ if (h->index != TCG_REG_XZR) {
147
+ tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, TCG_REG_TMP0,
148
+ h->base, h->index,
149
+ h->index_ext == TCG_TYPE_I32 ? MO_32 : MO_64, 0);
150
+ h->base = TCG_REG_TMP0;
151
+ h->index = TCG_REG_XZR;
152
+ h->index_ext = TCG_TYPE_I64;
153
+ }
154
+
155
+ return ldst;
156
+}
157
+
158
+static void tcg_out_qemu_ld128(TCGContext *s, TCGReg datalo, TCGReg datahi,
159
+ TCGReg addr_reg, MemOpIdx oi)
160
+{
161
+ TCGLabelQemuLdst *ldst;
162
+ HostAddress h;
163
+
164
+ ldst = prepare_host_addr_base_only(s, &h, addr_reg, oi, true);
165
+
166
+ if (h.aa.atom < MO_128 || have_lse2) {
167
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, h.base, 0, 0, 0);
168
+ } else {
169
+ TCGLabel *l0, *l1 = NULL;
170
+
171
+ /*
172
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
173
+ * 1: ldxp lo,hi,[addr]
174
+ * stxp tmp1,lo,hi,[addr]
175
+ * cbnz tmp1, 1b
176
+ *
177
+ * If we have already checked for 16-byte alignment, that's all
178
+ * we need. Otherwise we have determined that misaligned atomicity
179
+ * may be handled with two 8-byte loads.
180
+ */
181
+ if (h.aa.align < MO_128) {
182
+ /*
183
+ * TODO: align should be MO_64, so we only need test bit 3,
184
+ * which means we could use TBNZ instead of AND+CBNE.
185
+ */
186
+ l1 = gen_new_label();
187
+ tcg_out_logicali(s, I3404_ANDI, 0, TCG_REG_TMP1, addr_reg, 15);
188
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE,
189
+ TCG_REG_TMP1, 0, 1, l1);
190
+ }
191
+
192
+ l0 = gen_new_label();
193
+ tcg_out_label(s, l0);
194
+
195
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, datalo, datahi, h.base);
196
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP1, datalo, datahi, h.base);
197
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE, TCG_REG_TMP1, 0, 1, l0);
198
+
199
+ if (l1) {
200
+ TCGLabel *l2 = gen_new_label();
201
+ tcg_out_goto_label(s, l2);
202
+
203
+ tcg_out_label(s, l1);
204
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, h.base, 0, 0, 0);
205
+
206
+ tcg_out_label(s, l2);
207
+ }
208
+ }
209
+
210
+ if (ldst) {
211
+ ldst->type = TCG_TYPE_I128;
212
+ ldst->datalo_reg = datalo;
213
+ ldst->datahi_reg = datahi;
214
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
215
+ }
216
+}
217
+
218
+static void tcg_out_qemu_st128(TCGContext *s, TCGReg datalo, TCGReg datahi,
219
+ TCGReg addr_reg, MemOpIdx oi)
220
+{
221
+ TCGLabelQemuLdst *ldst;
222
+ HostAddress h;
223
+
224
+ ldst = prepare_host_addr_base_only(s, &h, addr_reg, oi, false);
225
+
226
+ if (h.aa.atom < MO_128 || have_lse2) {
227
+ tcg_out_insn(s, 3314, STP, datalo, datahi, h.base, 0, 0, 0);
228
+ } else {
229
+ TCGLabel *l0, *l1 = NULL;
230
+
231
+ /*
232
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
233
+ * 1: ldxp xzr,tmp1,[addr]
234
+ * stxp tmp1,lo,hi,[addr]
235
+ * cbnz tmp1, 1b
236
+ *
237
+ * If we have already checked for 16-byte alignment, that's all
238
+ * we need. Otherwise we have determined that misaligned atomicity
239
+ * may be handled with two 8-byte stores.
240
+ */
241
+ if (h.aa.align < MO_128) {
242
+ /*
243
+ * TODO: align should be MO_64, so we only need test bit 3,
244
+ * which means we could use TBNZ instead of AND+CBNE.
245
+ */
246
+ l1 = gen_new_label();
247
+ tcg_out_logicali(s, I3404_ANDI, 0, TCG_REG_TMP1, addr_reg, 15);
248
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE,
249
+ TCG_REG_TMP1, 0, 1, l1);
250
+ }
251
+
252
+ l0 = gen_new_label();
253
+ tcg_out_label(s, l0);
254
+
255
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR,
256
+ TCG_REG_XZR, TCG_REG_TMP1, h.base);
257
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP1, datalo, datahi, h.base);
258
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE, TCG_REG_TMP1, 0, 1, l0);
259
+
260
+ if (l1) {
261
+ TCGLabel *l2 = gen_new_label();
262
+ tcg_out_goto_label(s, l2);
263
+
264
+ tcg_out_label(s, l1);
265
+ tcg_out_insn(s, 3314, STP, datalo, datahi, h.base, 0, 0, 0);
266
+
267
+ tcg_out_label(s, l2);
268
+ }
269
+ }
270
+
271
+ if (ldst) {
272
+ ldst->type = TCG_TYPE_I128;
273
+ ldst->datalo_reg = datalo;
274
+ ldst->datahi_reg = datahi;
275
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
276
+ }
277
+}
278
+
279
static const tcg_insn_unit *tb_ret_addr;
280
281
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
282
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
283
case INDEX_op_qemu_st_i64:
284
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
285
break;
286
+ case INDEX_op_qemu_ld_i128:
287
+ tcg_out_qemu_ld128(s, a0, a1, a2, args[3]);
288
+ break;
289
+ case INDEX_op_qemu_st_i128:
290
+ tcg_out_qemu_st128(s, REG0(0), REG0(1), a2, args[3]);
291
+ break;
292
293
case INDEX_op_bswap64_i64:
294
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
295
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
296
case INDEX_op_qemu_ld_i32:
297
case INDEX_op_qemu_ld_i64:
298
return C_O1_I1(r, l);
299
+ case INDEX_op_qemu_ld_i128:
300
+ return C_O2_I1(r, r, l);
301
case INDEX_op_qemu_st_i32:
302
case INDEX_op_qemu_st_i64:
303
return C_O0_I2(lZ, l);
304
+ case INDEX_op_qemu_st_i128:
305
+ return C_O0_I3(lZ, lZ, l);
306
307
case INDEX_op_deposit_i32:
308
case INDEX_op_deposit_i64:
309
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
310
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
311
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
312
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
313
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
314
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
315
}
316
317
--
318
2.34.1
diff view generated by jsdifflib
New patch
1
Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required.
2
Note that these instructions do not require 16-byte alignment.
1
3
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/ppc/tcg-target-con-set.h | 2 +
8
tcg/ppc/tcg-target-con-str.h | 1 +
9
tcg/ppc/tcg-target.h | 3 +-
10
tcg/ppc/tcg-target.c.inc | 115 +++++++++++++++++++++++++++++++----
11
4 files changed, 108 insertions(+), 13 deletions(-)
12
13
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/ppc/tcg-target-con-set.h
16
+++ b/tcg/ppc/tcg-target-con-set.h
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
18
C_O0_I2(r, ri)
19
C_O0_I2(v, r)
20
C_O0_I3(r, r, r)
21
+C_O0_I3(o, m, r)
22
C_O0_I4(r, r, ri, ri)
23
C_O0_I4(r, r, r, r)
24
C_O1_I1(r, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rZ, rZ)
27
C_O1_I4(r, r, r, ri, ri)
28
C_O2_I1(r, r, r)
29
+C_O2_I1(o, m, r)
30
C_O2_I2(r, r, r, r)
31
C_O2_I4(r, r, rI, rZM, r, r)
32
C_O2_I4(r, r, r, r, rI, rZM)
33
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/ppc/tcg-target-con-str.h
36
+++ b/tcg/ppc/tcg-target-con-str.h
37
@@ -XXX,XX +XXX,XX @@
38
* REGS(letter, register_mask)
39
*/
40
REGS('r', ALL_GENERAL_REGS)
41
+REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
42
REGS('v', ALL_VECTOR_REGS)
43
44
/*
45
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/ppc/tcg-target.h
48
+++ b/tcg/ppc/tcg-target.h
49
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
50
#define TCG_TARGET_HAS_mulsh_i64 1
51
#endif
52
53
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
54
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
55
+ (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
56
57
/*
58
* While technically Altivec could support V64, it has no 64-bit store
59
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/ppc/tcg-target.c.inc
62
+++ b/tcg/ppc/tcg-target.c.inc
63
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
64
65
#define B OPCD( 18)
66
#define BC OPCD( 16)
67
+
68
#define LBZ OPCD( 34)
69
#define LHZ OPCD( 40)
70
#define LHA OPCD( 42)
71
#define LWZ OPCD( 32)
72
#define LWZUX XO31( 55)
73
-#define STB OPCD( 38)
74
-#define STH OPCD( 44)
75
-#define STW OPCD( 36)
76
-
77
-#define STD XO62( 0)
78
-#define STDU XO62( 1)
79
-#define STDX XO31(149)
80
-
81
#define LD XO58( 0)
82
#define LDX XO31( 21)
83
#define LDU XO58( 1)
84
#define LDUX XO31( 53)
85
#define LWA XO58( 2)
86
#define LWAX XO31(341)
87
+#define LQ OPCD( 56)
88
+
89
+#define STB OPCD( 38)
90
+#define STH OPCD( 44)
91
+#define STW OPCD( 36)
92
+#define STD XO62( 0)
93
+#define STDU XO62( 1)
94
+#define STDX XO31(149)
95
+#define STQ XO62( 2)
96
97
#define ADDIC OPCD( 12)
98
#define ADDI OPCD( 14)
99
@@ -XXX,XX +XXX,XX @@ typedef struct {
100
101
bool tcg_target_has_memory_bswap(MemOp memop)
102
{
103
- return true;
104
+ TCGAtomAlign aa;
105
+
106
+ if ((memop & MO_SIZE) <= MO_64) {
107
+ return true;
108
+ }
109
+
110
+ /*
111
+ * Reject 16-byte memop with 16-byte atomicity,
112
+ * but do allow a pair of 64-bit operations.
113
+ */
114
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
115
+ return aa.atom <= MO_64;
116
}
117
118
/*
119
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
120
{
121
TCGLabelQemuLdst *ldst = NULL;
122
MemOp opc = get_memop(oi);
123
- MemOp a_bits;
124
+ MemOp a_bits, s_bits;
125
126
/*
127
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
128
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
129
* As of 3.0, "the non-atomic access is performed as described in
130
* the corresponding list", which matches MO_ATOM_SUBALIGN.
131
*/
132
+ s_bits = opc & MO_SIZE;
133
h->aa = atom_and_align_for_opc(s, opc,
134
have_isa_3_00 ? MO_ATOM_SUBALIGN
135
: MO_ATOM_IFALIGN,
136
- false);
137
+ s_bits == MO_128);
138
a_bits = h->aa.align;
139
140
#ifdef CONFIG_SOFTMMU
141
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
142
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
143
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
144
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
145
- unsigned s_bits = opc & MO_SIZE;
146
147
ldst = new_ldst_label(s);
148
ldst->is_ld = is_ld;
149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
150
}
151
}
152
153
+static TCGLabelQemuLdst *
154
+prepare_host_addr_index_only(TCGContext *s, HostAddress *h, TCGReg addr_reg,
155
+ MemOpIdx oi, bool is_ld)
156
+{
157
+ TCGLabelQemuLdst *ldst;
158
+
159
+ ldst = prepare_host_addr(s, h, addr_reg, -1, oi, true);
160
+
161
+ /* Compose the final address, as LQ/STQ have no indexing. */
162
+ if (h->base != 0) {
163
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, h->base, h->index));
164
+ h->index = TCG_REG_TMP1;
165
+ h->base = 0;
166
+ }
167
+
168
+ return ldst;
169
+}
170
+
171
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
172
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
173
+{
174
+ TCGLabelQemuLdst *ldst;
175
+ HostAddress h;
176
+ bool need_bswap;
177
+ uint32_t insn;
178
+
179
+ ldst = prepare_host_addr_index_only(s, &h, addr_reg, oi, is_ld);
180
+ need_bswap = get_memop(oi) & MO_BSWAP;
181
+
182
+ if (h.aa.atom == MO_128) {
183
+ tcg_debug_assert(!need_bswap);
184
+ tcg_debug_assert(datalo & 1);
185
+ tcg_debug_assert(datahi == datalo - 1);
186
+ insn = is_ld ? LQ : STQ;
187
+ tcg_out32(s, insn | TAI(datahi, h.index, 0));
188
+ } else {
189
+ TCGReg d1, d2;
190
+
191
+ if (HOST_BIG_ENDIAN ^ need_bswap) {
192
+ d1 = datahi, d2 = datalo;
193
+ } else {
194
+ d1 = datalo, d2 = datahi;
195
+ }
196
+
197
+ if (need_bswap) {
198
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
199
+ insn = is_ld ? LDBRX : STDBRX;
200
+ tcg_out32(s, insn | TAB(d1, 0, h.index));
201
+ tcg_out32(s, insn | TAB(d2, h.index, TCG_REG_R0));
202
+ } else {
203
+ insn = is_ld ? LD : STD;
204
+ tcg_out32(s, insn | TAI(d1, h.index, 0));
205
+ tcg_out32(s, insn | TAI(d2, h.index, 8));
206
+ }
207
+ }
208
+
209
+ if (ldst) {
210
+ ldst->type = TCG_TYPE_I128;
211
+ ldst->datalo_reg = datalo;
212
+ ldst->datahi_reg = datahi;
213
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
214
+ }
215
+}
216
+
217
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
218
{
219
int i;
220
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
221
args[4], TCG_TYPE_I64);
222
}
223
break;
224
+ case INDEX_op_qemu_ld_i128:
225
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
226
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
227
+ break;
228
+
229
case INDEX_op_qemu_st_i32:
230
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
231
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
232
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
233
args[4], TCG_TYPE_I64);
234
}
235
break;
236
+ case INDEX_op_qemu_st_i128:
237
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
238
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
239
+ break;
240
241
case INDEX_op_setcond_i32:
242
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
243
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
244
: TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
245
: C_O0_I4(r, r, r, r));
246
247
+ case INDEX_op_qemu_ld_i128:
248
+ return C_O2_I1(o, m, r);
249
+ case INDEX_op_qemu_st_i128:
250
+ return C_O0_I3(o, m, r);
251
+
252
case INDEX_op_add_vec:
253
case INDEX_op_sub_vec:
254
case INDEX_op_mul_vec:
255
--
256
2.34.1
diff view generated by jsdifflib
New patch
1
Use LPQ/STPQ when 16-byte atomicity is required.
2
Note that these instructions require 16-byte alignment.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/s390x/tcg-target-con-set.h | 2 +
8
tcg/s390x/tcg-target.h | 2 +-
9
tcg/s390x/tcg-target.c.inc | 103 ++++++++++++++++++++++++++++++++-
10
3 files changed, 103 insertions(+), 4 deletions(-)
11
12
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/s390x/tcg-target-con-set.h
15
+++ b/tcg/s390x/tcg-target-con-set.h
16
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
17
C_O0_I2(r, ri)
18
C_O0_I2(r, rA)
19
C_O0_I2(v, r)
20
+C_O0_I3(o, m, r)
21
C_O1_I1(r, r)
22
C_O1_I1(v, r)
23
C_O1_I1(v, v)
24
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
25
C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rI, r)
27
C_O1_I4(r, r, rA, rI, r)
28
+C_O2_I1(o, m, r)
29
C_O2_I2(o, m, 0, r)
30
C_O2_I2(o, m, r, r)
31
C_O2_I3(o, m, 0, 1, r)
32
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/s390x/tcg-target.h
35
+++ b/tcg/s390x/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
37
#define TCG_TARGET_HAS_muluh_i64 0
38
#define TCG_TARGET_HAS_mulsh_i64 0
39
40
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
41
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
42
43
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
44
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
45
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/s390x/tcg-target.c.inc
48
+++ b/tcg/s390x/tcg-target.c.inc
49
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
50
RXY_LLGF = 0xe316,
51
RXY_LLGH = 0xe391,
52
RXY_LMG = 0xeb04,
53
+ RXY_LPQ = 0xe38f,
54
RXY_LRV = 0xe31e,
55
RXY_LRVG = 0xe30f,
56
RXY_LRVH = 0xe31f,
57
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
58
RXY_STG = 0xe324,
59
RXY_STHY = 0xe370,
60
RXY_STMG = 0xeb24,
61
+ RXY_STPQ = 0xe38e,
62
RXY_STRV = 0xe33e,
63
RXY_STRVG = 0xe32f,
64
RXY_STRVH = 0xe33f,
65
@@ -XXX,XX +XXX,XX @@ typedef struct {
66
67
bool tcg_target_has_memory_bswap(MemOp memop)
68
{
69
- return true;
70
+ TCGAtomAlign aa;
71
+
72
+ if ((memop & MO_SIZE) <= MO_64) {
73
+ return true;
74
+ }
75
+
76
+ /*
77
+ * Reject 16-byte memop with 16-byte atomicity,
78
+ * but do allow a pair of 64-bit operations.
79
+ */
80
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
81
+ return aa.atom <= MO_64;
82
}
83
84
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
85
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
86
{
87
TCGLabelQemuLdst *ldst = NULL;
88
MemOp opc = get_memop(oi);
89
+ MemOp s_bits = opc & MO_SIZE;
90
unsigned a_mask;
91
92
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
93
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
94
a_mask = (1 << h->aa.align) - 1;
95
96
#ifdef CONFIG_SOFTMMU
97
- unsigned s_bits = opc & MO_SIZE;
98
unsigned s_mask = (1 << s_bits) - 1;
99
int mem_index = get_mmuidx(oi);
100
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
101
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
102
}
103
}
104
105
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
106
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
107
+{
108
+ TCGLabel *l1 = NULL, *l2 = NULL;
109
+ TCGLabelQemuLdst *ldst;
110
+ HostAddress h;
111
+ bool need_bswap;
112
+ bool use_pair;
113
+ S390Opcode insn;
114
+
115
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
116
+
117
+ use_pair = h.aa.atom < MO_128;
118
+ need_bswap = get_memop(oi) & MO_BSWAP;
119
+
120
+ if (!use_pair) {
121
+ /*
122
+ * Atomicity requires we use LPQ. If we've already checked for
123
+ * 16-byte alignment, that's all we need. If we arrive with
124
+ * lesser alignment, we have determined that less than 16-byte
125
+ * alignment can be satisfied with two 8-byte loads.
126
+ */
127
+ if (h.aa.align < MO_128) {
128
+ use_pair = true;
129
+ l1 = gen_new_label();
130
+ l2 = gen_new_label();
131
+
132
+ tcg_out_insn(s, RI, TMLL, addr_reg, 15);
133
+ tgen_branch(s, 7, l1); /* CC in {1,2,3} */
134
+ }
135
+
136
+ tcg_debug_assert(!need_bswap);
137
+ tcg_debug_assert(datalo & 1);
138
+ tcg_debug_assert(datahi == datalo - 1);
139
+ insn = is_ld ? RXY_LPQ : RXY_STPQ;
140
+ tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
141
+
142
+ if (use_pair) {
143
+ tgen_branch(s, S390_CC_ALWAYS, l2);
144
+ tcg_out_label(s, l1);
145
+ }
146
+ }
147
+ if (use_pair) {
148
+ TCGReg d1, d2;
149
+
150
+ if (need_bswap) {
151
+ d1 = datalo, d2 = datahi;
152
+ insn = is_ld ? RXY_LRVG : RXY_STRVG;
153
+ } else {
154
+ d1 = datahi, d2 = datalo;
155
+ insn = is_ld ? RXY_LG : RXY_STG;
156
+ }
157
+
158
+ if (h.base == d1 || h.index == d1) {
159
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
160
+ h.base = TCG_TMP0;
161
+ h.index = TCG_REG_NONE;
162
+ h.disp = 0;
163
+ }
164
+ tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
165
+ tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
166
+ }
167
+ if (l2) {
168
+ tcg_out_label(s, l2);
169
+ }
170
+
171
+ if (ldst) {
172
+ ldst->type = TCG_TYPE_I128;
173
+ ldst->datalo_reg = datalo;
174
+ ldst->datahi_reg = datahi;
175
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
176
+ }
177
+}
178
+
179
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
180
{
181
/* Reuse the zeroing that exists for goto_ptr. */
182
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
183
case INDEX_op_qemu_st_i64:
184
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
185
break;
186
+ case INDEX_op_qemu_ld_i128:
187
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
188
+ break;
189
+ case INDEX_op_qemu_st_i128:
190
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
191
+ break;
192
193
case INDEX_op_ld16s_i64:
194
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
195
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
196
case INDEX_op_qemu_st_i64:
197
case INDEX_op_qemu_st_i32:
198
return C_O0_I2(r, r);
199
+ case INDEX_op_qemu_ld_i128:
200
+ return C_O2_I1(o, m, r);
201
+ case INDEX_op_qemu_st_i128:
202
+ return C_O0_I3(o, m, r);
203
204
case INDEX_op_deposit_i32:
205
case INDEX_op_deposit_i64:
206
--
207
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/tcg-op-ldst.c | 1006 +++++++++++++++++++++++++++++++++++++++++++++
5
tcg/tcg-op.c | 974 -------------------------------------------
6
tcg/meson.build | 1 +
7
3 files changed, 1007 insertions(+), 974 deletions(-)
8
create mode 100644 tcg/tcg-op-ldst.c
1
9
10
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
11
new file mode 100644
12
index XXXXXXX..XXXXXXX
13
--- /dev/null
14
+++ b/tcg/tcg-op-ldst.c
15
@@ -XXX,XX +XXX,XX @@
16
+/*
17
+ * Tiny Code Generator for QEMU
18
+ *
19
+ * Copyright (c) 2008 Fabrice Bellard
20
+ *
21
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
22
+ * of this software and associated documentation files (the "Software"), to deal
23
+ * in the Software without restriction, including without limitation the rights
24
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25
+ * copies of the Software, and to permit persons to whom the Software is
26
+ * furnished to do so, subject to the following conditions:
27
+ *
28
+ * The above copyright notice and this permission notice shall be included in
29
+ * all copies or substantial portions of the Software.
30
+ *
31
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
34
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37
+ * THE SOFTWARE.
38
+ */
39
+
40
+#include "qemu/osdep.h"
41
+#include "exec/exec-all.h"
42
+#include "tcg/tcg.h"
43
+#include "tcg/tcg-temp-internal.h"
44
+#include "tcg/tcg-op.h"
45
+#include "tcg/tcg-mo.h"
46
+#include "exec/plugin-gen.h"
47
+#include "tcg-internal.h"
48
+
49
+
50
+static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
51
+{
52
+ /* Trigger the asserts within as early as possible. */
53
+ unsigned a_bits = get_alignment_bits(op);
54
+
55
+ /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
56
+ if (a_bits == (op & MO_SIZE)) {
57
+ op = (op & ~MO_AMASK) | MO_ALIGN;
58
+ }
59
+
60
+ switch (op & MO_SIZE) {
61
+ case MO_8:
62
+ op &= ~MO_BSWAP;
63
+ break;
64
+ case MO_16:
65
+ break;
66
+ case MO_32:
67
+ if (!is64) {
68
+ op &= ~MO_SIGN;
69
+ }
70
+ break;
71
+ case MO_64:
72
+ if (is64) {
73
+ op &= ~MO_SIGN;
74
+ break;
75
+ }
76
+ /* fall through */
77
+ default:
78
+ g_assert_not_reached();
79
+ }
80
+ if (st) {
81
+ op &= ~MO_SIGN;
82
+ }
83
+ return op;
84
+}
85
+
86
+static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
87
+ MemOp memop, TCGArg idx)
88
+{
89
+ MemOpIdx oi = make_memop_idx(memop, idx);
90
+#if TARGET_LONG_BITS == 32
91
+ tcg_gen_op3i_i32(opc, val, addr, oi);
92
+#else
93
+ if (TCG_TARGET_REG_BITS == 32) {
94
+ tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
95
+ } else {
96
+ tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
97
+ }
98
+#endif
99
+}
100
+
101
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
102
+ MemOp memop, TCGArg idx)
103
+{
104
+ MemOpIdx oi = make_memop_idx(memop, idx);
105
+#if TARGET_LONG_BITS == 32
106
+ if (TCG_TARGET_REG_BITS == 32) {
107
+ tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
108
+ } else {
109
+ tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
110
+ }
111
+#else
112
+ if (TCG_TARGET_REG_BITS == 32) {
113
+ tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
114
+ TCGV_LOW(addr), TCGV_HIGH(addr), oi);
115
+ } else {
116
+ tcg_gen_op3i_i64(opc, val, addr, oi);
117
+ }
118
+#endif
119
+}
120
+
121
+static void tcg_gen_req_mo(TCGBar type)
122
+{
123
+#ifdef TCG_GUEST_DEFAULT_MO
124
+ type &= TCG_GUEST_DEFAULT_MO;
125
+#endif
126
+ type &= ~TCG_TARGET_DEFAULT_MO;
127
+ if (type) {
128
+ tcg_gen_mb(type | TCG_BAR_SC);
129
+ }
130
+}
131
+
132
+static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
133
+{
134
+#ifdef CONFIG_PLUGIN
135
+ if (tcg_ctx->plugin_insn != NULL) {
136
+ /* Save a copy of the vaddr for use after a load. */
137
+ TCGv temp = tcg_temp_new();
138
+ tcg_gen_mov_tl(temp, vaddr);
139
+ return temp;
140
+ }
141
+#endif
142
+ return vaddr;
143
+}
144
+
145
+static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
146
+ enum qemu_plugin_mem_rw rw)
147
+{
148
+#ifdef CONFIG_PLUGIN
149
+ if (tcg_ctx->plugin_insn != NULL) {
150
+ qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
151
+ plugin_gen_empty_mem_callback(vaddr, info);
152
+ tcg_temp_free(vaddr);
153
+ }
154
+#endif
155
+}
156
+
157
+void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
158
+{
159
+ MemOp orig_memop;
160
+ MemOpIdx oi;
161
+
162
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
163
+ memop = tcg_canonicalize_memop(memop, 0, 0);
164
+ oi = make_memop_idx(memop, idx);
165
+
166
+ orig_memop = memop;
167
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
168
+ memop &= ~MO_BSWAP;
169
+ /* The bswap primitive benefits from zero-extended input. */
170
+ if ((memop & MO_SSIZE) == MO_SW) {
171
+ memop &= ~MO_SIGN;
172
+ }
173
+ }
174
+
175
+ addr = plugin_prep_mem_callbacks(addr);
176
+ gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
177
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
178
+
179
+ if ((orig_memop ^ memop) & MO_BSWAP) {
180
+ switch (orig_memop & MO_SIZE) {
181
+ case MO_16:
182
+ tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
183
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
184
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
185
+ break;
186
+ case MO_32:
187
+ tcg_gen_bswap32_i32(val, val);
188
+ break;
189
+ default:
190
+ g_assert_not_reached();
191
+ }
192
+ }
193
+}
194
+
195
+void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
196
+{
197
+ TCGv_i32 swap = NULL;
198
+ MemOpIdx oi;
199
+
200
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
201
+ memop = tcg_canonicalize_memop(memop, 0, 1);
202
+ oi = make_memop_idx(memop, idx);
203
+
204
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
205
+ swap = tcg_temp_ebb_new_i32();
206
+ switch (memop & MO_SIZE) {
207
+ case MO_16:
208
+ tcg_gen_bswap16_i32(swap, val, 0);
209
+ break;
210
+ case MO_32:
211
+ tcg_gen_bswap32_i32(swap, val);
212
+ break;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ val = swap;
217
+ memop &= ~MO_BSWAP;
218
+ }
219
+
220
+ addr = plugin_prep_mem_callbacks(addr);
221
+ if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
222
+ gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
223
+ } else {
224
+ gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
225
+ }
226
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
227
+
228
+ if (swap) {
229
+ tcg_temp_free_i32(swap);
230
+ }
231
+}
232
+
233
+void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
234
+{
235
+ MemOp orig_memop;
236
+ MemOpIdx oi;
237
+
238
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
239
+ tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
240
+ if (memop & MO_SIGN) {
241
+ tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
242
+ } else {
243
+ tcg_gen_movi_i32(TCGV_HIGH(val), 0);
244
+ }
245
+ return;
246
+ }
247
+
248
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
249
+ memop = tcg_canonicalize_memop(memop, 1, 0);
250
+ oi = make_memop_idx(memop, idx);
251
+
252
+ orig_memop = memop;
253
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
254
+ memop &= ~MO_BSWAP;
255
+ /* The bswap primitive benefits from zero-extended input. */
256
+ if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
257
+ memop &= ~MO_SIGN;
258
+ }
259
+ }
260
+
261
+ addr = plugin_prep_mem_callbacks(addr);
262
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
263
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
264
+
265
+ if ((orig_memop ^ memop) & MO_BSWAP) {
266
+ int flags = (orig_memop & MO_SIGN
267
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
268
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
269
+ switch (orig_memop & MO_SIZE) {
270
+ case MO_16:
271
+ tcg_gen_bswap16_i64(val, val, flags);
272
+ break;
273
+ case MO_32:
274
+ tcg_gen_bswap32_i64(val, val, flags);
275
+ break;
276
+ case MO_64:
277
+ tcg_gen_bswap64_i64(val, val);
278
+ break;
279
+ default:
280
+ g_assert_not_reached();
281
+ }
282
+ }
283
+}
284
+
285
+void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
286
+{
287
+ TCGv_i64 swap = NULL;
288
+ MemOpIdx oi;
289
+
290
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
291
+ tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
292
+ return;
293
+ }
294
+
295
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
296
+ memop = tcg_canonicalize_memop(memop, 1, 1);
297
+ oi = make_memop_idx(memop, idx);
298
+
299
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
300
+ swap = tcg_temp_ebb_new_i64();
301
+ switch (memop & MO_SIZE) {
302
+ case MO_16:
303
+ tcg_gen_bswap16_i64(swap, val, 0);
304
+ break;
305
+ case MO_32:
306
+ tcg_gen_bswap32_i64(swap, val, 0);
307
+ break;
308
+ case MO_64:
309
+ tcg_gen_bswap64_i64(swap, val);
310
+ break;
311
+ default:
312
+ g_assert_not_reached();
313
+ }
314
+ val = swap;
315
+ memop &= ~MO_BSWAP;
316
+ }
317
+
318
+ addr = plugin_prep_mem_callbacks(addr);
319
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
320
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
321
+
322
+ if (swap) {
323
+ tcg_temp_free_i64(swap);
324
+ }
325
+}
326
+
327
+/*
328
+ * Return true if @mop, without knowledge of the pointer alignment,
329
+ * does not require 16-byte atomicity, and it would be adventagous
330
+ * to avoid a call to a helper function.
331
+ */
332
+static bool use_two_i64_for_i128(MemOp mop)
333
+{
334
+#ifdef CONFIG_SOFTMMU
335
+ /* Two softmmu tlb lookups is larger than one function call. */
336
+ return false;
337
+#else
338
+ /*
339
+ * For user-only, two 64-bit operations may well be smaller than a call.
340
+ * Determine if that would be legal for the requested atomicity.
341
+ */
342
+ switch (mop & MO_ATOM_MASK) {
343
+ case MO_ATOM_NONE:
344
+ case MO_ATOM_IFALIGN_PAIR:
345
+ return true;
346
+ case MO_ATOM_IFALIGN:
347
+ case MO_ATOM_SUBALIGN:
348
+ case MO_ATOM_WITHIN16:
349
+ case MO_ATOM_WITHIN16_PAIR:
350
+ /* In a serialized context, no atomicity is required. */
351
+ return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
352
+ default:
353
+ g_assert_not_reached();
354
+ }
355
+#endif
356
+}
357
+
358
+static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
359
+{
360
+ MemOp mop_1 = orig, mop_2;
361
+
362
+ tcg_debug_assert((orig & MO_SIZE) == MO_128);
363
+ tcg_debug_assert((orig & MO_SIGN) == 0);
364
+
365
+ /* Reduce the size to 64-bit. */
366
+ mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
367
+
368
+ /* Retain the alignment constraints of the original. */
369
+ switch (orig & MO_AMASK) {
370
+ case MO_UNALN:
371
+ case MO_ALIGN_2:
372
+ case MO_ALIGN_4:
373
+ mop_2 = mop_1;
374
+ break;
375
+ case MO_ALIGN_8:
376
+ /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
377
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
378
+ mop_2 = mop_1;
379
+ break;
380
+ case MO_ALIGN:
381
+ /* Second has 8-byte alignment; first has 16-byte alignment. */
382
+ mop_2 = mop_1;
383
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
384
+ break;
385
+ case MO_ALIGN_16:
386
+ case MO_ALIGN_32:
387
+ case MO_ALIGN_64:
388
+ /* Second has 8-byte alignment; first retains original. */
389
+ mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
390
+ break;
391
+ default:
392
+ g_assert_not_reached();
393
+ }
394
+
395
+ /* Use a memory ordering implemented by the host. */
396
+ if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
397
+ mop_1 &= ~MO_BSWAP;
398
+ mop_2 &= ~MO_BSWAP;
399
+ }
400
+
401
+ ret[0] = mop_1;
402
+ ret[1] = mop_2;
403
+}
404
+
405
+#if TARGET_LONG_BITS == 64
406
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
407
+#else
408
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
409
+#endif
410
+
411
+void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
412
+{
413
+ const MemOpIdx oi = make_memop_idx(memop, idx);
414
+
415
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
416
+ tcg_debug_assert((memop & MO_SIGN) == 0);
417
+
418
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
419
+ addr = plugin_prep_mem_callbacks(addr);
420
+
421
+ /* TODO: For now, force 32-bit hosts to use the helper. */
422
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
423
+ TCGv_i64 lo, hi;
424
+ TCGArg addr_arg;
425
+ MemOpIdx adj_oi;
426
+ bool need_bswap = false;
427
+
428
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
429
+ lo = TCGV128_HIGH(val);
430
+ hi = TCGV128_LOW(val);
431
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
432
+ need_bswap = true;
433
+ } else {
434
+ lo = TCGV128_LOW(val);
435
+ hi = TCGV128_HIGH(val);
436
+ adj_oi = oi;
437
+ }
438
+
439
+#if TARGET_LONG_BITS == 32
440
+ addr_arg = tcgv_i32_arg(addr);
441
+#else
442
+ addr_arg = tcgv_i64_arg(addr);
443
+#endif
444
+ tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
445
+
446
+ if (need_bswap) {
447
+ tcg_gen_bswap64_i64(lo, lo);
448
+ tcg_gen_bswap64_i64(hi, hi);
449
+ }
450
+ } else if (use_two_i64_for_i128(memop)) {
451
+ MemOp mop[2];
452
+ TCGv addr_p8;
453
+ TCGv_i64 x, y;
454
+
455
+ canonicalize_memop_i128_as_i64(mop, memop);
456
+
457
+ /*
458
+ * Since there are no global TCGv_i128, there is no visible state
459
+ * changed if the second load faults. Load directly into the two
460
+ * subwords.
461
+ */
462
+ if ((memop & MO_BSWAP) == MO_LE) {
463
+ x = TCGV128_LOW(val);
464
+ y = TCGV128_HIGH(val);
465
+ } else {
466
+ x = TCGV128_HIGH(val);
467
+ y = TCGV128_LOW(val);
468
+ }
469
+
470
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
471
+
472
+ if ((mop[0] ^ memop) & MO_BSWAP) {
473
+ tcg_gen_bswap64_i64(x, x);
474
+ }
475
+
476
+ addr_p8 = tcg_temp_ebb_new();
477
+ tcg_gen_addi_tl(addr_p8, addr, 8);
478
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
479
+ tcg_temp_free(addr_p8);
480
+
481
+ if ((mop[0] ^ memop) & MO_BSWAP) {
482
+ tcg_gen_bswap64_i64(y, y);
483
+ }
484
+ } else {
485
+ gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
486
+ }
487
+
488
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
489
+}
490
+
491
+void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
492
+{
493
+ const MemOpIdx oi = make_memop_idx(memop, idx);
494
+
495
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
496
+ tcg_debug_assert((memop & MO_SIGN) == 0);
497
+
498
+ tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
499
+ addr = plugin_prep_mem_callbacks(addr);
500
+
501
+ /* TODO: For now, force 32-bit hosts to use the helper. */
502
+
503
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
504
+ TCGv_i64 lo, hi;
505
+ TCGArg addr_arg;
506
+ MemOpIdx adj_oi;
507
+ bool need_bswap = false;
508
+
509
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
510
+ lo = tcg_temp_new_i64();
511
+ hi = tcg_temp_new_i64();
512
+ tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
513
+ tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
514
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
515
+ need_bswap = true;
516
+ } else {
517
+ lo = TCGV128_LOW(val);
518
+ hi = TCGV128_HIGH(val);
519
+ adj_oi = oi;
520
+ }
521
+
522
+#if TARGET_LONG_BITS == 32
523
+ addr_arg = tcgv_i32_arg(addr);
524
+#else
525
+ addr_arg = tcgv_i64_arg(addr);
526
+#endif
527
+ tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
528
+
529
+ if (need_bswap) {
530
+ tcg_temp_free_i64(lo);
531
+ tcg_temp_free_i64(hi);
532
+ }
533
+ } else if (use_two_i64_for_i128(memop)) {
534
+ MemOp mop[2];
535
+ TCGv addr_p8;
536
+ TCGv_i64 x, y;
537
+
538
+ canonicalize_memop_i128_as_i64(mop, memop);
539
+
540
+ if ((memop & MO_BSWAP) == MO_LE) {
541
+ x = TCGV128_LOW(val);
542
+ y = TCGV128_HIGH(val);
543
+ } else {
544
+ x = TCGV128_HIGH(val);
545
+ y = TCGV128_LOW(val);
546
+ }
547
+
548
+ addr_p8 = tcg_temp_ebb_new();
549
+ if ((mop[0] ^ memop) & MO_BSWAP) {
550
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
551
+
552
+ tcg_gen_bswap64_i64(t, x);
553
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
554
+ tcg_gen_bswap64_i64(t, y);
555
+ tcg_gen_addi_tl(addr_p8, addr, 8);
556
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
557
+ tcg_temp_free_i64(t);
558
+ } else {
559
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
560
+ tcg_gen_addi_tl(addr_p8, addr, 8);
561
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
562
+ }
563
+ tcg_temp_free(addr_p8);
564
+ } else {
565
+ gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
566
+ }
567
+
568
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
569
+}
570
+
571
+static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
572
+{
573
+ switch (opc & MO_SSIZE) {
574
+ case MO_SB:
575
+ tcg_gen_ext8s_i32(ret, val);
576
+ break;
577
+ case MO_UB:
578
+ tcg_gen_ext8u_i32(ret, val);
579
+ break;
580
+ case MO_SW:
581
+ tcg_gen_ext16s_i32(ret, val);
582
+ break;
583
+ case MO_UW:
584
+ tcg_gen_ext16u_i32(ret, val);
585
+ break;
586
+ default:
587
+ tcg_gen_mov_i32(ret, val);
588
+ break;
589
+ }
590
+}
591
+
592
+static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
593
+{
594
+ switch (opc & MO_SSIZE) {
595
+ case MO_SB:
596
+ tcg_gen_ext8s_i64(ret, val);
597
+ break;
598
+ case MO_UB:
599
+ tcg_gen_ext8u_i64(ret, val);
600
+ break;
601
+ case MO_SW:
602
+ tcg_gen_ext16s_i64(ret, val);
603
+ break;
604
+ case MO_UW:
605
+ tcg_gen_ext16u_i64(ret, val);
606
+ break;
607
+ case MO_SL:
608
+ tcg_gen_ext32s_i64(ret, val);
609
+ break;
610
+ case MO_UL:
611
+ tcg_gen_ext32u_i64(ret, val);
612
+ break;
613
+ default:
614
+ tcg_gen_mov_i64(ret, val);
615
+ break;
616
+ }
617
+}
618
+
619
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
620
+ TCGv_i32, TCGv_i32, TCGv_i32);
621
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
622
+ TCGv_i64, TCGv_i64, TCGv_i32);
623
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
624
+ TCGv_i128, TCGv_i128, TCGv_i32);
625
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
626
+ TCGv_i32, TCGv_i32);
627
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
628
+ TCGv_i64, TCGv_i32);
629
+
630
+#ifdef CONFIG_ATOMIC64
631
+# define WITH_ATOMIC64(X) X,
632
+#else
633
+# define WITH_ATOMIC64(X)
634
+#endif
635
+#ifdef CONFIG_CMPXCHG128
636
+# define WITH_ATOMIC128(X) X,
637
+#else
638
+# define WITH_ATOMIC128(X)
639
+#endif
640
+
641
+static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
642
+ [MO_8] = gen_helper_atomic_cmpxchgb,
643
+ [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
644
+ [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
645
+ [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
646
+ [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
647
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
648
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
649
+ WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
650
+ WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
651
+};
652
+
653
+void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
654
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
655
+{
656
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
657
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
658
+
659
+ tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
660
+
661
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
662
+ tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
663
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
664
+ tcg_temp_free_i32(t2);
665
+
666
+ if (memop & MO_SIGN) {
667
+ tcg_gen_ext_i32(retv, t1, memop);
668
+ } else {
669
+ tcg_gen_mov_i32(retv, t1);
670
+ }
671
+ tcg_temp_free_i32(t1);
672
+}
673
+
674
+void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
675
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
676
+{
677
+ gen_atomic_cx_i32 gen;
678
+ MemOpIdx oi;
679
+
680
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
681
+ tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
682
+ return;
683
+ }
684
+
685
+ memop = tcg_canonicalize_memop(memop, 0, 0);
686
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
687
+ tcg_debug_assert(gen != NULL);
688
+
689
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
690
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
691
+
692
+ if (memop & MO_SIGN) {
693
+ tcg_gen_ext_i32(retv, retv, memop);
694
+ }
695
+}
696
+
697
+void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
698
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
699
+{
700
+ TCGv_i64 t1, t2;
701
+
702
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
703
+ tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
704
+ TCGV_LOW(newv), idx, memop);
705
+ if (memop & MO_SIGN) {
706
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
707
+ } else {
708
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
709
+ }
710
+ return;
711
+ }
712
+
713
+ t1 = tcg_temp_ebb_new_i64();
714
+ t2 = tcg_temp_ebb_new_i64();
715
+
716
+ tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
717
+
718
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
719
+ tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
720
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
721
+ tcg_temp_free_i64(t2);
722
+
723
+ if (memop & MO_SIGN) {
724
+ tcg_gen_ext_i64(retv, t1, memop);
725
+ } else {
726
+ tcg_gen_mov_i64(retv, t1);
727
+ }
728
+ tcg_temp_free_i64(t1);
729
+}
730
+
731
+void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
732
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
733
+{
734
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
735
+ tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
736
+ return;
737
+ }
738
+
739
+ if ((memop & MO_SIZE) == MO_64) {
740
+ gen_atomic_cx_i64 gen;
741
+
742
+ memop = tcg_canonicalize_memop(memop, 1, 0);
743
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
744
+ if (gen) {
745
+ MemOpIdx oi = make_memop_idx(memop, idx);
746
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
747
+ return;
748
+ }
749
+
750
+ gen_helper_exit_atomic(cpu_env);
751
+
752
+ /*
753
+ * Produce a result for a well-formed opcode stream. This satisfies
754
+ * liveness for set before used, which happens before this dead code
755
+ * is removed.
756
+ */
757
+ tcg_gen_movi_i64(retv, 0);
758
+ return;
759
+ }
760
+
761
+ if (TCG_TARGET_REG_BITS == 32) {
762
+ tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
763
+ TCGV_LOW(newv), idx, memop);
764
+ if (memop & MO_SIGN) {
765
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
766
+ } else {
767
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
768
+ }
769
+ } else {
770
+ TCGv_i32 c32 = tcg_temp_ebb_new_i32();
771
+ TCGv_i32 n32 = tcg_temp_ebb_new_i32();
772
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
773
+
774
+ tcg_gen_extrl_i64_i32(c32, cmpv);
775
+ tcg_gen_extrl_i64_i32(n32, newv);
776
+ tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
777
+ tcg_temp_free_i32(c32);
778
+ tcg_temp_free_i32(n32);
779
+
780
+ tcg_gen_extu_i32_i64(retv, r32);
781
+ tcg_temp_free_i32(r32);
782
+
783
+ if (memop & MO_SIGN) {
784
+ tcg_gen_ext_i64(retv, retv, memop);
785
+ }
786
+ }
787
+}
788
+
789
+void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
790
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
791
+{
792
+ if (TCG_TARGET_REG_BITS == 32) {
793
+ /* Inline expansion below is simply too large for 32-bit hosts. */
794
+ gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
795
+ ? gen_helper_nonatomic_cmpxchgo_le
796
+ : gen_helper_nonatomic_cmpxchgo_be);
797
+ MemOpIdx oi = make_memop_idx(memop, idx);
798
+
799
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
800
+ tcg_debug_assert((memop & MO_SIGN) == 0);
801
+
802
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
803
+ } else {
804
+ TCGv_i128 oldv = tcg_temp_ebb_new_i128();
805
+ TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
806
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
807
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
808
+ TCGv_i64 z = tcg_constant_i64(0);
809
+
810
+ tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
811
+
812
+ /* Compare i128 */
813
+ tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
814
+ tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
815
+ tcg_gen_or_i64(t0, t0, t1);
816
+
817
+ /* tmpv = equal ? newv : oldv */
818
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
819
+ TCGV128_LOW(newv), TCGV128_LOW(oldv));
820
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
821
+ TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
822
+
823
+ /* Unconditional writeback. */
824
+ tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
825
+ tcg_gen_mov_i128(retv, oldv);
826
+
827
+ tcg_temp_free_i64(t0);
828
+ tcg_temp_free_i64(t1);
829
+ tcg_temp_free_i128(tmpv);
830
+ tcg_temp_free_i128(oldv);
831
+ }
832
+}
833
+
834
+void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
835
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
836
+{
837
+ gen_atomic_cx_i128 gen;
838
+
839
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
840
+ tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
841
+ return;
842
+ }
843
+
844
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
845
+ tcg_debug_assert((memop & MO_SIGN) == 0);
846
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
847
+
848
+ if (gen) {
849
+ MemOpIdx oi = make_memop_idx(memop, idx);
850
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
851
+ return;
852
+ }
853
+
854
+ gen_helper_exit_atomic(cpu_env);
855
+
856
+ /*
857
+ * Produce a result for a well-formed opcode stream. This satisfies
858
+ * liveness for set before used, which happens before this dead code
859
+ * is removed.
860
+ */
861
+ tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
862
+ tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
863
+}
864
+
865
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
866
+ TCGArg idx, MemOp memop, bool new_val,
867
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
868
+{
869
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
870
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
871
+
872
+ memop = tcg_canonicalize_memop(memop, 0, 0);
873
+
874
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
875
+ tcg_gen_ext_i32(t2, val, memop);
876
+ gen(t2, t1, t2);
877
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
878
+
879
+ tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
880
+ tcg_temp_free_i32(t1);
881
+ tcg_temp_free_i32(t2);
882
+}
883
+
884
+static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
885
+ TCGArg idx, MemOp memop, void * const table[])
886
+{
887
+ gen_atomic_op_i32 gen;
888
+ MemOpIdx oi;
889
+
890
+ memop = tcg_canonicalize_memop(memop, 0, 0);
891
+
892
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
893
+ tcg_debug_assert(gen != NULL);
894
+
895
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
896
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
897
+
898
+ if (memop & MO_SIGN) {
899
+ tcg_gen_ext_i32(ret, ret, memop);
900
+ }
901
+}
902
+
903
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
904
+ TCGArg idx, MemOp memop, bool new_val,
905
+ void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
906
+{
907
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
908
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
909
+
910
+ memop = tcg_canonicalize_memop(memop, 1, 0);
911
+
912
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
913
+ tcg_gen_ext_i64(t2, val, memop);
914
+ gen(t2, t1, t2);
915
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
916
+
917
+ tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
918
+ tcg_temp_free_i64(t1);
919
+ tcg_temp_free_i64(t2);
920
+}
921
+
922
+static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
923
+ TCGArg idx, MemOp memop, void * const table[])
924
+{
925
+ memop = tcg_canonicalize_memop(memop, 1, 0);
926
+
927
+ if ((memop & MO_SIZE) == MO_64) {
928
+#ifdef CONFIG_ATOMIC64
929
+ gen_atomic_op_i64 gen;
930
+ MemOpIdx oi;
931
+
932
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
933
+ tcg_debug_assert(gen != NULL);
934
+
935
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
936
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
937
+#else
938
+ gen_helper_exit_atomic(cpu_env);
939
+ /* Produce a result, so that we have a well-formed opcode stream
940
+ with respect to uses of the result in the (dead) code following. */
941
+ tcg_gen_movi_i64(ret, 0);
942
+#endif /* CONFIG_ATOMIC64 */
943
+ } else {
944
+ TCGv_i32 v32 = tcg_temp_ebb_new_i32();
945
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
946
+
947
+ tcg_gen_extrl_i64_i32(v32, val);
948
+ do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
949
+ tcg_temp_free_i32(v32);
950
+
951
+ tcg_gen_extu_i32_i64(ret, r32);
952
+ tcg_temp_free_i32(r32);
953
+
954
+ if (memop & MO_SIGN) {
955
+ tcg_gen_ext_i64(ret, ret, memop);
956
+ }
957
+ }
958
+}
959
+
960
+#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
961
+static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
962
+ [MO_8] = gen_helper_atomic_##NAME##b, \
963
+ [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
964
+ [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
965
+ [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
966
+ [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
967
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
968
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
969
+}; \
970
+void tcg_gen_atomic_##NAME##_i32 \
971
+ (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
972
+{ \
973
+ if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
974
+ do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
975
+ } else { \
976
+ do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
977
+ tcg_gen_##OP##_i32); \
978
+ } \
979
+} \
980
+void tcg_gen_atomic_##NAME##_i64 \
981
+ (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
982
+{ \
983
+ if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
984
+ do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
985
+ } else { \
986
+ do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
987
+ tcg_gen_##OP##_i64); \
988
+ } \
989
+}
990
+
991
+GEN_ATOMIC_HELPER(fetch_add, add, 0)
992
+GEN_ATOMIC_HELPER(fetch_and, and, 0)
993
+GEN_ATOMIC_HELPER(fetch_or, or, 0)
994
+GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
995
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
996
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
997
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
998
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
999
+
1000
+GEN_ATOMIC_HELPER(add_fetch, add, 1)
1001
+GEN_ATOMIC_HELPER(and_fetch, and, 1)
1002
+GEN_ATOMIC_HELPER(or_fetch, or, 1)
1003
+GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1004
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1005
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1006
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1007
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1008
+
1009
+static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1010
+{
1011
+ tcg_gen_mov_i32(r, b);
1012
+}
1013
+
1014
+static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1015
+{
1016
+ tcg_gen_mov_i64(r, b);
1017
+}
1018
+
1019
+GEN_ATOMIC_HELPER(xchg, mov2, 0)
1020
+
1021
+#undef GEN_ATOMIC_HELPER
1022
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
1023
index XXXXXXX..XXXXXXX 100644
1024
--- a/tcg/tcg-op.c
1025
+++ b/tcg/tcg-op.c
1026
@@ -XXX,XX +XXX,XX @@
1027
#include "tcg/tcg.h"
1028
#include "tcg/tcg-temp-internal.h"
1029
#include "tcg/tcg-op.h"
1030
-#include "tcg/tcg-mo.h"
1031
#include "exec/plugin-gen.h"
1032
#include "tcg-internal.h"
1033
1034
@@ -XXX,XX +XXX,XX @@ void tcg_gen_lookup_and_goto_ptr(void)
1035
tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
1036
tcg_temp_free_ptr(ptr);
1037
}
1038
-
1039
-static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
1040
-{
1041
- /* Trigger the asserts within as early as possible. */
1042
- unsigned a_bits = get_alignment_bits(op);
1043
-
1044
- /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
1045
- if (a_bits == (op & MO_SIZE)) {
1046
- op = (op & ~MO_AMASK) | MO_ALIGN;
1047
- }
1048
-
1049
- switch (op & MO_SIZE) {
1050
- case MO_8:
1051
- op &= ~MO_BSWAP;
1052
- break;
1053
- case MO_16:
1054
- break;
1055
- case MO_32:
1056
- if (!is64) {
1057
- op &= ~MO_SIGN;
1058
- }
1059
- break;
1060
- case MO_64:
1061
- if (is64) {
1062
- op &= ~MO_SIGN;
1063
- break;
1064
- }
1065
- /* fall through */
1066
- default:
1067
- g_assert_not_reached();
1068
- }
1069
- if (st) {
1070
- op &= ~MO_SIGN;
1071
- }
1072
- return op;
1073
-}
1074
-
1075
-static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
1076
- MemOp memop, TCGArg idx)
1077
-{
1078
- MemOpIdx oi = make_memop_idx(memop, idx);
1079
-#if TARGET_LONG_BITS == 32
1080
- tcg_gen_op3i_i32(opc, val, addr, oi);
1081
-#else
1082
- if (TCG_TARGET_REG_BITS == 32) {
1083
- tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
1084
- } else {
1085
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
1086
- }
1087
-#endif
1088
-}
1089
-
1090
-static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
1091
- MemOp memop, TCGArg idx)
1092
-{
1093
- MemOpIdx oi = make_memop_idx(memop, idx);
1094
-#if TARGET_LONG_BITS == 32
1095
- if (TCG_TARGET_REG_BITS == 32) {
1096
- tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
1097
- } else {
1098
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
1099
- }
1100
-#else
1101
- if (TCG_TARGET_REG_BITS == 32) {
1102
- tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
1103
- TCGV_LOW(addr), TCGV_HIGH(addr), oi);
1104
- } else {
1105
- tcg_gen_op3i_i64(opc, val, addr, oi);
1106
- }
1107
-#endif
1108
-}
1109
-
1110
-static void tcg_gen_req_mo(TCGBar type)
1111
-{
1112
-#ifdef TCG_GUEST_DEFAULT_MO
1113
- type &= TCG_GUEST_DEFAULT_MO;
1114
-#endif
1115
- type &= ~TCG_TARGET_DEFAULT_MO;
1116
- if (type) {
1117
- tcg_gen_mb(type | TCG_BAR_SC);
1118
- }
1119
-}
1120
-
1121
-static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
1122
-{
1123
-#ifdef CONFIG_PLUGIN
1124
- if (tcg_ctx->plugin_insn != NULL) {
1125
- /* Save a copy of the vaddr for use after a load. */
1126
- TCGv temp = tcg_temp_new();
1127
- tcg_gen_mov_tl(temp, vaddr);
1128
- return temp;
1129
- }
1130
-#endif
1131
- return vaddr;
1132
-}
1133
-
1134
-static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
1135
- enum qemu_plugin_mem_rw rw)
1136
-{
1137
-#ifdef CONFIG_PLUGIN
1138
- if (tcg_ctx->plugin_insn != NULL) {
1139
- qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
1140
- plugin_gen_empty_mem_callback(vaddr, info);
1141
- tcg_temp_free(vaddr);
1142
- }
1143
-#endif
1144
-}
1145
-
1146
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1147
-{
1148
- MemOp orig_memop;
1149
- MemOpIdx oi;
1150
-
1151
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1152
- memop = tcg_canonicalize_memop(memop, 0, 0);
1153
- oi = make_memop_idx(memop, idx);
1154
-
1155
- orig_memop = memop;
1156
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1157
- memop &= ~MO_BSWAP;
1158
- /* The bswap primitive benefits from zero-extended input. */
1159
- if ((memop & MO_SSIZE) == MO_SW) {
1160
- memop &= ~MO_SIGN;
1161
- }
1162
- }
1163
-
1164
- addr = plugin_prep_mem_callbacks(addr);
1165
- gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
1166
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1167
-
1168
- if ((orig_memop ^ memop) & MO_BSWAP) {
1169
- switch (orig_memop & MO_SIZE) {
1170
- case MO_16:
1171
- tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
1172
- ? TCG_BSWAP_IZ | TCG_BSWAP_OS
1173
- : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
1174
- break;
1175
- case MO_32:
1176
- tcg_gen_bswap32_i32(val, val);
1177
- break;
1178
- default:
1179
- g_assert_not_reached();
1180
- }
1181
- }
1182
-}
1183
-
1184
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1185
-{
1186
- TCGv_i32 swap = NULL;
1187
- MemOpIdx oi;
1188
-
1189
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1190
- memop = tcg_canonicalize_memop(memop, 0, 1);
1191
- oi = make_memop_idx(memop, idx);
1192
-
1193
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1194
- swap = tcg_temp_ebb_new_i32();
1195
- switch (memop & MO_SIZE) {
1196
- case MO_16:
1197
- tcg_gen_bswap16_i32(swap, val, 0);
1198
- break;
1199
- case MO_32:
1200
- tcg_gen_bswap32_i32(swap, val);
1201
- break;
1202
- default:
1203
- g_assert_not_reached();
1204
- }
1205
- val = swap;
1206
- memop &= ~MO_BSWAP;
1207
- }
1208
-
1209
- addr = plugin_prep_mem_callbacks(addr);
1210
- if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
1211
- gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
1212
- } else {
1213
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
1214
- }
1215
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1216
-
1217
- if (swap) {
1218
- tcg_temp_free_i32(swap);
1219
- }
1220
-}
1221
-
1222
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1223
-{
1224
- MemOp orig_memop;
1225
- MemOpIdx oi;
1226
-
1227
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1228
- tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
1229
- if (memop & MO_SIGN) {
1230
- tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
1231
- } else {
1232
- tcg_gen_movi_i32(TCGV_HIGH(val), 0);
1233
- }
1234
- return;
1235
- }
1236
-
1237
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1238
- memop = tcg_canonicalize_memop(memop, 1, 0);
1239
- oi = make_memop_idx(memop, idx);
1240
-
1241
- orig_memop = memop;
1242
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1243
- memop &= ~MO_BSWAP;
1244
- /* The bswap primitive benefits from zero-extended input. */
1245
- if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
1246
- memop &= ~MO_SIGN;
1247
- }
1248
- }
1249
-
1250
- addr = plugin_prep_mem_callbacks(addr);
1251
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
1252
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1253
-
1254
- if ((orig_memop ^ memop) & MO_BSWAP) {
1255
- int flags = (orig_memop & MO_SIGN
1256
- ? TCG_BSWAP_IZ | TCG_BSWAP_OS
1257
- : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
1258
- switch (orig_memop & MO_SIZE) {
1259
- case MO_16:
1260
- tcg_gen_bswap16_i64(val, val, flags);
1261
- break;
1262
- case MO_32:
1263
- tcg_gen_bswap32_i64(val, val, flags);
1264
- break;
1265
- case MO_64:
1266
- tcg_gen_bswap64_i64(val, val);
1267
- break;
1268
- default:
1269
- g_assert_not_reached();
1270
- }
1271
- }
1272
-}
1273
-
1274
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1275
-{
1276
- TCGv_i64 swap = NULL;
1277
- MemOpIdx oi;
1278
-
1279
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1280
- tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
1281
- return;
1282
- }
1283
-
1284
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1285
- memop = tcg_canonicalize_memop(memop, 1, 1);
1286
- oi = make_memop_idx(memop, idx);
1287
-
1288
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1289
- swap = tcg_temp_ebb_new_i64();
1290
- switch (memop & MO_SIZE) {
1291
- case MO_16:
1292
- tcg_gen_bswap16_i64(swap, val, 0);
1293
- break;
1294
- case MO_32:
1295
- tcg_gen_bswap32_i64(swap, val, 0);
1296
- break;
1297
- case MO_64:
1298
- tcg_gen_bswap64_i64(swap, val);
1299
- break;
1300
- default:
1301
- g_assert_not_reached();
1302
- }
1303
- val = swap;
1304
- memop &= ~MO_BSWAP;
1305
- }
1306
-
1307
- addr = plugin_prep_mem_callbacks(addr);
1308
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
1309
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1310
-
1311
- if (swap) {
1312
- tcg_temp_free_i64(swap);
1313
- }
1314
-}
1315
-
1316
-/*
1317
- * Return true if @mop, without knowledge of the pointer alignment,
1318
- * does not require 16-byte atomicity, and it would be adventagous
1319
- * to avoid a call to a helper function.
1320
- */
1321
-static bool use_two_i64_for_i128(MemOp mop)
1322
-{
1323
-#ifdef CONFIG_SOFTMMU
1324
- /* Two softmmu tlb lookups is larger than one function call. */
1325
- return false;
1326
-#else
1327
- /*
1328
- * For user-only, two 64-bit operations may well be smaller than a call.
1329
- * Determine if that would be legal for the requested atomicity.
1330
- */
1331
- switch (mop & MO_ATOM_MASK) {
1332
- case MO_ATOM_NONE:
1333
- case MO_ATOM_IFALIGN_PAIR:
1334
- return true;
1335
- case MO_ATOM_IFALIGN:
1336
- case MO_ATOM_SUBALIGN:
1337
- case MO_ATOM_WITHIN16:
1338
- case MO_ATOM_WITHIN16_PAIR:
1339
- /* In a serialized context, no atomicity is required. */
1340
- return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
1341
- default:
1342
- g_assert_not_reached();
1343
- }
1344
-#endif
1345
-}
1346
-
1347
-static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
1348
-{
1349
- MemOp mop_1 = orig, mop_2;
1350
-
1351
- tcg_debug_assert((orig & MO_SIZE) == MO_128);
1352
- tcg_debug_assert((orig & MO_SIGN) == 0);
1353
-
1354
- /* Reduce the size to 64-bit. */
1355
- mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
1356
-
1357
- /* Retain the alignment constraints of the original. */
1358
- switch (orig & MO_AMASK) {
1359
- case MO_UNALN:
1360
- case MO_ALIGN_2:
1361
- case MO_ALIGN_4:
1362
- mop_2 = mop_1;
1363
- break;
1364
- case MO_ALIGN_8:
1365
- /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
1366
- mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
1367
- mop_2 = mop_1;
1368
- break;
1369
- case MO_ALIGN:
1370
- /* Second has 8-byte alignment; first has 16-byte alignment. */
1371
- mop_2 = mop_1;
1372
- mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
1373
- break;
1374
- case MO_ALIGN_16:
1375
- case MO_ALIGN_32:
1376
- case MO_ALIGN_64:
1377
- /* Second has 8-byte alignment; first retains original. */
1378
- mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
1379
- break;
1380
- default:
1381
- g_assert_not_reached();
1382
- }
1383
-
1384
- /* Use a memory ordering implemented by the host. */
1385
- if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
1386
- mop_1 &= ~MO_BSWAP;
1387
- mop_2 &= ~MO_BSWAP;
1388
- }
1389
-
1390
- ret[0] = mop_1;
1391
- ret[1] = mop_2;
1392
-}
1393
-
1394
-#if TARGET_LONG_BITS == 64
1395
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
1396
-#else
1397
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
1398
-#endif
1399
-
1400
-void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1401
-{
1402
- const MemOpIdx oi = make_memop_idx(memop, idx);
1403
-
1404
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1405
- tcg_debug_assert((memop & MO_SIGN) == 0);
1406
-
1407
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1408
- addr = plugin_prep_mem_callbacks(addr);
1409
-
1410
- /* TODO: For now, force 32-bit hosts to use the helper. */
1411
- if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
1412
- TCGv_i64 lo, hi;
1413
- TCGArg addr_arg;
1414
- MemOpIdx adj_oi;
1415
- bool need_bswap = false;
1416
-
1417
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1418
- lo = TCGV128_HIGH(val);
1419
- hi = TCGV128_LOW(val);
1420
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
1421
- need_bswap = true;
1422
- } else {
1423
- lo = TCGV128_LOW(val);
1424
- hi = TCGV128_HIGH(val);
1425
- adj_oi = oi;
1426
- }
1427
-
1428
-#if TARGET_LONG_BITS == 32
1429
- addr_arg = tcgv_i32_arg(addr);
1430
-#else
1431
- addr_arg = tcgv_i64_arg(addr);
1432
-#endif
1433
- tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
1434
-
1435
- if (need_bswap) {
1436
- tcg_gen_bswap64_i64(lo, lo);
1437
- tcg_gen_bswap64_i64(hi, hi);
1438
- }
1439
- } else if (use_two_i64_for_i128(memop)) {
1440
- MemOp mop[2];
1441
- TCGv addr_p8;
1442
- TCGv_i64 x, y;
1443
-
1444
- canonicalize_memop_i128_as_i64(mop, memop);
1445
-
1446
- /*
1447
- * Since there are no global TCGv_i128, there is no visible state
1448
- * changed if the second load faults. Load directly into the two
1449
- * subwords.
1450
- */
1451
- if ((memop & MO_BSWAP) == MO_LE) {
1452
- x = TCGV128_LOW(val);
1453
- y = TCGV128_HIGH(val);
1454
- } else {
1455
- x = TCGV128_HIGH(val);
1456
- y = TCGV128_LOW(val);
1457
- }
1458
-
1459
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
1460
-
1461
- if ((mop[0] ^ memop) & MO_BSWAP) {
1462
- tcg_gen_bswap64_i64(x, x);
1463
- }
1464
-
1465
- addr_p8 = tcg_temp_ebb_new();
1466
- tcg_gen_addi_tl(addr_p8, addr, 8);
1467
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
1468
- tcg_temp_free(addr_p8);
1469
-
1470
- if ((mop[0] ^ memop) & MO_BSWAP) {
1471
- tcg_gen_bswap64_i64(y, y);
1472
- }
1473
- } else {
1474
- gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
1475
- }
1476
-
1477
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1478
-}
1479
-
1480
-void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1481
-{
1482
- const MemOpIdx oi = make_memop_idx(memop, idx);
1483
-
1484
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1485
- tcg_debug_assert((memop & MO_SIGN) == 0);
1486
-
1487
- tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
1488
- addr = plugin_prep_mem_callbacks(addr);
1489
-
1490
- /* TODO: For now, force 32-bit hosts to use the helper. */
1491
-
1492
- if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
1493
- TCGv_i64 lo, hi;
1494
- TCGArg addr_arg;
1495
- MemOpIdx adj_oi;
1496
- bool need_bswap = false;
1497
-
1498
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1499
- lo = tcg_temp_new_i64();
1500
- hi = tcg_temp_new_i64();
1501
- tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
1502
- tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
1503
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
1504
- need_bswap = true;
1505
- } else {
1506
- lo = TCGV128_LOW(val);
1507
- hi = TCGV128_HIGH(val);
1508
- adj_oi = oi;
1509
- }
1510
-
1511
-#if TARGET_LONG_BITS == 32
1512
- addr_arg = tcgv_i32_arg(addr);
1513
-#else
1514
- addr_arg = tcgv_i64_arg(addr);
1515
-#endif
1516
- tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
1517
-
1518
- if (need_bswap) {
1519
- tcg_temp_free_i64(lo);
1520
- tcg_temp_free_i64(hi);
1521
- }
1522
- } else if (use_two_i64_for_i128(memop)) {
1523
- MemOp mop[2];
1524
- TCGv addr_p8;
1525
- TCGv_i64 x, y;
1526
-
1527
- canonicalize_memop_i128_as_i64(mop, memop);
1528
-
1529
- if ((memop & MO_BSWAP) == MO_LE) {
1530
- x = TCGV128_LOW(val);
1531
- y = TCGV128_HIGH(val);
1532
- } else {
1533
- x = TCGV128_HIGH(val);
1534
- y = TCGV128_LOW(val);
1535
- }
1536
-
1537
- addr_p8 = tcg_temp_ebb_new();
1538
- if ((mop[0] ^ memop) & MO_BSWAP) {
1539
- TCGv_i64 t = tcg_temp_ebb_new_i64();
1540
-
1541
- tcg_gen_bswap64_i64(t, x);
1542
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
1543
- tcg_gen_bswap64_i64(t, y);
1544
- tcg_gen_addi_tl(addr_p8, addr, 8);
1545
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
1546
- tcg_temp_free_i64(t);
1547
- } else {
1548
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
1549
- tcg_gen_addi_tl(addr_p8, addr, 8);
1550
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
1551
- }
1552
- tcg_temp_free(addr_p8);
1553
- } else {
1554
- gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
1555
- }
1556
-
1557
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1558
-}
1559
-
1560
-static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
1561
-{
1562
- switch (opc & MO_SSIZE) {
1563
- case MO_SB:
1564
- tcg_gen_ext8s_i32(ret, val);
1565
- break;
1566
- case MO_UB:
1567
- tcg_gen_ext8u_i32(ret, val);
1568
- break;
1569
- case MO_SW:
1570
- tcg_gen_ext16s_i32(ret, val);
1571
- break;
1572
- case MO_UW:
1573
- tcg_gen_ext16u_i32(ret, val);
1574
- break;
1575
- default:
1576
- tcg_gen_mov_i32(ret, val);
1577
- break;
1578
- }
1579
-}
1580
-
1581
-static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
1582
-{
1583
- switch (opc & MO_SSIZE) {
1584
- case MO_SB:
1585
- tcg_gen_ext8s_i64(ret, val);
1586
- break;
1587
- case MO_UB:
1588
- tcg_gen_ext8u_i64(ret, val);
1589
- break;
1590
- case MO_SW:
1591
- tcg_gen_ext16s_i64(ret, val);
1592
- break;
1593
- case MO_UW:
1594
- tcg_gen_ext16u_i64(ret, val);
1595
- break;
1596
- case MO_SL:
1597
- tcg_gen_ext32s_i64(ret, val);
1598
- break;
1599
- case MO_UL:
1600
- tcg_gen_ext32u_i64(ret, val);
1601
- break;
1602
- default:
1603
- tcg_gen_mov_i64(ret, val);
1604
- break;
1605
- }
1606
-}
1607
-
1608
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
1609
- TCGv_i32, TCGv_i32, TCGv_i32);
1610
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
1611
- TCGv_i64, TCGv_i64, TCGv_i32);
1612
-typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
1613
- TCGv_i128, TCGv_i128, TCGv_i32);
1614
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
1615
- TCGv_i32, TCGv_i32);
1616
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
1617
- TCGv_i64, TCGv_i32);
1618
-
1619
-#ifdef CONFIG_ATOMIC64
1620
-# define WITH_ATOMIC64(X) X,
1621
-#else
1622
-# define WITH_ATOMIC64(X)
1623
-#endif
1624
-#ifdef CONFIG_CMPXCHG128
1625
-# define WITH_ATOMIC128(X) X,
1626
-#else
1627
-# define WITH_ATOMIC128(X)
1628
-#endif
1629
-
1630
-static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
1631
- [MO_8] = gen_helper_atomic_cmpxchgb,
1632
- [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
1633
- [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
1634
- [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
1635
- [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
1636
- WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
1637
- WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
1638
- WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
1639
- WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
1640
-};
1641
-
1642
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1643
- TCGv_i32 newv, TCGArg idx, MemOp memop)
1644
-{
1645
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1646
- TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1647
-
1648
- tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
1649
-
1650
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
1651
- tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
1652
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
1653
- tcg_temp_free_i32(t2);
1654
-
1655
- if (memop & MO_SIGN) {
1656
- tcg_gen_ext_i32(retv, t1, memop);
1657
- } else {
1658
- tcg_gen_mov_i32(retv, t1);
1659
- }
1660
- tcg_temp_free_i32(t1);
1661
-}
1662
-
1663
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1664
- TCGv_i32 newv, TCGArg idx, MemOp memop)
1665
-{
1666
- gen_atomic_cx_i32 gen;
1667
- MemOpIdx oi;
1668
-
1669
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1670
- tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
1671
- return;
1672
- }
1673
-
1674
- memop = tcg_canonicalize_memop(memop, 0, 0);
1675
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1676
- tcg_debug_assert(gen != NULL);
1677
-
1678
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1679
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1680
-
1681
- if (memop & MO_SIGN) {
1682
- tcg_gen_ext_i32(retv, retv, memop);
1683
- }
1684
-}
1685
-
1686
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1687
- TCGv_i64 newv, TCGArg idx, MemOp memop)
1688
-{
1689
- TCGv_i64 t1, t2;
1690
-
1691
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1692
- tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1693
- TCGV_LOW(newv), idx, memop);
1694
- if (memop & MO_SIGN) {
1695
- tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1696
- } else {
1697
- tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1698
- }
1699
- return;
1700
- }
1701
-
1702
- t1 = tcg_temp_ebb_new_i64();
1703
- t2 = tcg_temp_ebb_new_i64();
1704
-
1705
- tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
1706
-
1707
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
1708
- tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
1709
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
1710
- tcg_temp_free_i64(t2);
1711
-
1712
- if (memop & MO_SIGN) {
1713
- tcg_gen_ext_i64(retv, t1, memop);
1714
- } else {
1715
- tcg_gen_mov_i64(retv, t1);
1716
- }
1717
- tcg_temp_free_i64(t1);
1718
-}
1719
-
1720
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1721
- TCGv_i64 newv, TCGArg idx, MemOp memop)
1722
-{
1723
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1724
- tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
1725
- return;
1726
- }
1727
-
1728
- if ((memop & MO_SIZE) == MO_64) {
1729
- gen_atomic_cx_i64 gen;
1730
-
1731
- memop = tcg_canonicalize_memop(memop, 1, 0);
1732
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1733
- if (gen) {
1734
- MemOpIdx oi = make_memop_idx(memop, idx);
1735
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1736
- return;
1737
- }
1738
-
1739
- gen_helper_exit_atomic(cpu_env);
1740
-
1741
- /*
1742
- * Produce a result for a well-formed opcode stream. This satisfies
1743
- * liveness for set before used, which happens before this dead code
1744
- * is removed.
1745
- */
1746
- tcg_gen_movi_i64(retv, 0);
1747
- return;
1748
- }
1749
-
1750
- if (TCG_TARGET_REG_BITS == 32) {
1751
- tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1752
- TCGV_LOW(newv), idx, memop);
1753
- if (memop & MO_SIGN) {
1754
- tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1755
- } else {
1756
- tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1757
- }
1758
- } else {
1759
- TCGv_i32 c32 = tcg_temp_ebb_new_i32();
1760
- TCGv_i32 n32 = tcg_temp_ebb_new_i32();
1761
- TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1762
-
1763
- tcg_gen_extrl_i64_i32(c32, cmpv);
1764
- tcg_gen_extrl_i64_i32(n32, newv);
1765
- tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
1766
- tcg_temp_free_i32(c32);
1767
- tcg_temp_free_i32(n32);
1768
-
1769
- tcg_gen_extu_i32_i64(retv, r32);
1770
- tcg_temp_free_i32(r32);
1771
-
1772
- if (memop & MO_SIGN) {
1773
- tcg_gen_ext_i64(retv, retv, memop);
1774
- }
1775
- }
1776
-}
1777
-
1778
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1779
- TCGv_i128 newv, TCGArg idx, MemOp memop)
1780
-{
1781
- if (TCG_TARGET_REG_BITS == 32) {
1782
- /* Inline expansion below is simply too large for 32-bit hosts. */
1783
- gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
1784
- ? gen_helper_nonatomic_cmpxchgo_le
1785
- : gen_helper_nonatomic_cmpxchgo_be);
1786
- MemOpIdx oi = make_memop_idx(memop, idx);
1787
-
1788
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1789
- tcg_debug_assert((memop & MO_SIGN) == 0);
1790
-
1791
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1792
- } else {
1793
- TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1794
- TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1795
- TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1796
- TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1797
- TCGv_i64 z = tcg_constant_i64(0);
1798
-
1799
- tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
1800
-
1801
- /* Compare i128 */
1802
- tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
1803
- tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
1804
- tcg_gen_or_i64(t0, t0, t1);
1805
-
1806
- /* tmpv = equal ? newv : oldv */
1807
- tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
1808
- TCGV128_LOW(newv), TCGV128_LOW(oldv));
1809
- tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1810
- TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1811
-
1812
- /* Unconditional writeback. */
1813
- tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
1814
- tcg_gen_mov_i128(retv, oldv);
1815
-
1816
- tcg_temp_free_i64(t0);
1817
- tcg_temp_free_i64(t1);
1818
- tcg_temp_free_i128(tmpv);
1819
- tcg_temp_free_i128(oldv);
1820
- }
1821
-}
1822
-
1823
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1824
- TCGv_i128 newv, TCGArg idx, MemOp memop)
1825
-{
1826
- gen_atomic_cx_i128 gen;
1827
-
1828
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1829
- tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
1830
- return;
1831
- }
1832
-
1833
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1834
- tcg_debug_assert((memop & MO_SIGN) == 0);
1835
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1836
-
1837
- if (gen) {
1838
- MemOpIdx oi = make_memop_idx(memop, idx);
1839
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1840
- return;
1841
- }
1842
-
1843
- gen_helper_exit_atomic(cpu_env);
1844
-
1845
- /*
1846
- * Produce a result for a well-formed opcode stream. This satisfies
1847
- * liveness for set before used, which happens before this dead code
1848
- * is removed.
1849
- */
1850
- tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1851
- tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1852
-}
1853
-
1854
-static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1855
- TCGArg idx, MemOp memop, bool new_val,
1856
- void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1857
-{
1858
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1859
- TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1860
-
1861
- memop = tcg_canonicalize_memop(memop, 0, 0);
1862
-
1863
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
1864
- tcg_gen_ext_i32(t2, val, memop);
1865
- gen(t2, t1, t2);
1866
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
1867
-
1868
- tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1869
- tcg_temp_free_i32(t1);
1870
- tcg_temp_free_i32(t2);
1871
-}
1872
-
1873
-static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1874
- TCGArg idx, MemOp memop, void * const table[])
1875
-{
1876
- gen_atomic_op_i32 gen;
1877
- MemOpIdx oi;
1878
-
1879
- memop = tcg_canonicalize_memop(memop, 0, 0);
1880
-
1881
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
1882
- tcg_debug_assert(gen != NULL);
1883
-
1884
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1885
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
1886
-
1887
- if (memop & MO_SIGN) {
1888
- tcg_gen_ext_i32(ret, ret, memop);
1889
- }
1890
-}
1891
-
1892
-static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1893
- TCGArg idx, MemOp memop, bool new_val,
1894
- void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1895
-{
1896
- TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1897
- TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1898
-
1899
- memop = tcg_canonicalize_memop(memop, 1, 0);
1900
-
1901
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
1902
- tcg_gen_ext_i64(t2, val, memop);
1903
- gen(t2, t1, t2);
1904
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
1905
-
1906
- tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1907
- tcg_temp_free_i64(t1);
1908
- tcg_temp_free_i64(t2);
1909
-}
1910
-
1911
-static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1912
- TCGArg idx, MemOp memop, void * const table[])
1913
-{
1914
- memop = tcg_canonicalize_memop(memop, 1, 0);
1915
-
1916
- if ((memop & MO_SIZE) == MO_64) {
1917
-#ifdef CONFIG_ATOMIC64
1918
- gen_atomic_op_i64 gen;
1919
- MemOpIdx oi;
1920
-
1921
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
1922
- tcg_debug_assert(gen != NULL);
1923
-
1924
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1925
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
1926
-#else
1927
- gen_helper_exit_atomic(cpu_env);
1928
- /* Produce a result, so that we have a well-formed opcode stream
1929
- with respect to uses of the result in the (dead) code following. */
1930
- tcg_gen_movi_i64(ret, 0);
1931
-#endif /* CONFIG_ATOMIC64 */
1932
- } else {
1933
- TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1934
- TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1935
-
1936
- tcg_gen_extrl_i64_i32(v32, val);
1937
- do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1938
- tcg_temp_free_i32(v32);
1939
-
1940
- tcg_gen_extu_i32_i64(ret, r32);
1941
- tcg_temp_free_i32(r32);
1942
-
1943
- if (memop & MO_SIGN) {
1944
- tcg_gen_ext_i64(ret, ret, memop);
1945
- }
1946
- }
1947
-}
1948
-
1949
-#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
1950
-static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
1951
- [MO_8] = gen_helper_atomic_##NAME##b, \
1952
- [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
1953
- [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
1954
- [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
1955
- [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
1956
- WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
1957
- WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
1958
-}; \
1959
-void tcg_gen_atomic_##NAME##_i32 \
1960
- (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
1961
-{ \
1962
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1963
- do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
1964
- } else { \
1965
- do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
1966
- tcg_gen_##OP##_i32); \
1967
- } \
1968
-} \
1969
-void tcg_gen_atomic_##NAME##_i64 \
1970
- (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
1971
-{ \
1972
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1973
- do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
1974
- } else { \
1975
- do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
1976
- tcg_gen_##OP##_i64); \
1977
- } \
1978
-}
1979
-
1980
-GEN_ATOMIC_HELPER(fetch_add, add, 0)
1981
-GEN_ATOMIC_HELPER(fetch_and, and, 0)
1982
-GEN_ATOMIC_HELPER(fetch_or, or, 0)
1983
-GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1984
-GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1985
-GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1986
-GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1987
-GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1988
-
1989
-GEN_ATOMIC_HELPER(add_fetch, add, 1)
1990
-GEN_ATOMIC_HELPER(and_fetch, and, 1)
1991
-GEN_ATOMIC_HELPER(or_fetch, or, 1)
1992
-GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1993
-GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1994
-GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1995
-GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1996
-GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1997
-
1998
-static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1999
-{
2000
- tcg_gen_mov_i32(r, b);
2001
-}
2002
-
2003
-static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
2004
-{
2005
- tcg_gen_mov_i64(r, b);
2006
-}
2007
-
2008
-GEN_ATOMIC_HELPER(xchg, mov2, 0)
2009
-
2010
-#undef GEN_ATOMIC_HELPER
2011
diff --git a/tcg/meson.build b/tcg/meson.build
2012
index XXXXXXX..XXXXXXX 100644
2013
--- a/tcg/meson.build
2014
+++ b/tcg/meson.build
2015
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(files(
2016
'tcg.c',
2017
'tcg-common.c',
2018
'tcg-op.c',
2019
+ 'tcg-op-ldst.c',
2020
'tcg-op-gvec.c',
2021
'tcg-op-vec.c',
2022
))
2023
--
2024
2.34.1
2025
2026
diff view generated by jsdifflib
New patch
1
1
We already pass uint64_t to restore_state_to_opc; this changes all
2
of the other uses from insn_start through the encoding to decoding.
3
4
Reviewed-by: Anton Johansson <anjo@rev.ng>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op.h | 39 +++++++++------------------------------
9
include/tcg/tcg-opc.h | 2 +-
10
include/tcg/tcg.h | 30 +++++++++++++++---------------
11
accel/tcg/translate-all.c | 28 ++++++++++++++++------------
12
tcg/tcg.c | 18 ++++--------------
13
5 files changed, 45 insertions(+), 72 deletions(-)
14
15
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/tcg/tcg-op.h
18
+++ b/include/tcg/tcg-op.h
19
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
20
#endif
21
22
#if TARGET_INSN_START_WORDS == 1
23
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
24
static inline void tcg_gen_insn_start(target_ulong pc)
25
{
26
- tcg_gen_op1(INDEX_op_insn_start, pc);
27
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS);
28
+ tcg_set_insn_start_param(op, 0, pc);
29
}
30
-# else
31
-static inline void tcg_gen_insn_start(target_ulong pc)
32
-{
33
- tcg_gen_op2(INDEX_op_insn_start, (uint32_t)pc, (uint32_t)(pc >> 32));
34
-}
35
-# endif
36
#elif TARGET_INSN_START_WORDS == 2
37
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
38
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
39
{
40
- tcg_gen_op2(INDEX_op_insn_start, pc, a1);
41
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 2 * 64 / TCG_TARGET_REG_BITS);
42
+ tcg_set_insn_start_param(op, 0, pc);
43
+ tcg_set_insn_start_param(op, 1, a1);
44
}
45
-# else
46
-static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
47
-{
48
- tcg_gen_op4(INDEX_op_insn_start,
49
- (uint32_t)pc, (uint32_t)(pc >> 32),
50
- (uint32_t)a1, (uint32_t)(a1 >> 32));
51
-}
52
-# endif
53
#elif TARGET_INSN_START_WORDS == 3
54
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
55
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
56
target_ulong a2)
57
{
58
- tcg_gen_op3(INDEX_op_insn_start, pc, a1, a2);
59
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 3 * 64 / TCG_TARGET_REG_BITS);
60
+ tcg_set_insn_start_param(op, 0, pc);
61
+ tcg_set_insn_start_param(op, 1, a1);
62
+ tcg_set_insn_start_param(op, 2, a2);
63
}
64
-# else
65
-static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
66
- target_ulong a2)
67
-{
68
- tcg_gen_op6(INDEX_op_insn_start,
69
- (uint32_t)pc, (uint32_t)(pc >> 32),
70
- (uint32_t)a1, (uint32_t)(a1 >> 32),
71
- (uint32_t)a2, (uint32_t)(a2 >> 32));
72
-}
73
-# endif
74
#else
75
# error "Unhandled number of operands to insn_start"
76
#endif
77
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/tcg/tcg-opc.h
80
+++ b/include/tcg/tcg-opc.h
81
@@ -XXX,XX +XXX,XX @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
82
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
83
84
/* QEMU specific */
85
-DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
86
+DEF(insn_start, 0, 0, DATA64_ARGS * TARGET_INSN_START_WORDS,
87
TCG_OPF_NOT_PRESENT)
88
DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
89
DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
90
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
91
index XXXXXXX..XXXXXXX 100644
92
--- a/include/tcg/tcg.h
93
+++ b/include/tcg/tcg.h
94
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
95
TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS];
96
97
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
98
- target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
99
+ uint64_t gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
100
101
/* Exit to translator on overflow. */
102
sigjmp_buf jmp_trans;
103
@@ -XXX,XX +XXX,XX @@ static inline void tcg_set_insn_param(TCGOp *op, int arg, TCGArg v)
104
op->args[arg] = v;
105
}
106
107
-static inline target_ulong tcg_get_insn_start_param(TCGOp *op, int arg)
108
+static inline uint64_t tcg_get_insn_start_param(TCGOp *op, int arg)
109
{
110
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
111
- return tcg_get_insn_param(op, arg);
112
-#else
113
- return tcg_get_insn_param(op, arg * 2) |
114
- ((uint64_t)tcg_get_insn_param(op, arg * 2 + 1) << 32);
115
-#endif
116
+ if (TCG_TARGET_REG_BITS == 64) {
117
+ return tcg_get_insn_param(op, arg);
118
+ } else {
119
+ return deposit64(tcg_get_insn_param(op, arg * 2), 32, 32,
120
+ tcg_get_insn_param(op, arg * 2 + 1));
121
+ }
122
}
123
124
-static inline void tcg_set_insn_start_param(TCGOp *op, int arg, target_ulong v)
125
+static inline void tcg_set_insn_start_param(TCGOp *op, int arg, uint64_t v)
126
{
127
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
128
- tcg_set_insn_param(op, arg, v);
129
-#else
130
- tcg_set_insn_param(op, arg * 2, v);
131
- tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
132
-#endif
133
+ if (TCG_TARGET_REG_BITS == 64) {
134
+ tcg_set_insn_param(op, arg, v);
135
+ } else {
136
+ tcg_set_insn_param(op, arg * 2, v);
137
+ tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
138
+ }
139
}
140
141
/* The last op that was emitted. */
142
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/accel/tcg/translate-all.c
145
+++ b/accel/tcg/translate-all.c
146
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
147
148
TBContext tb_ctx;
149
150
-/* Encode VAL as a signed leb128 sequence at P.
151
- Return P incremented past the encoded value. */
152
-static uint8_t *encode_sleb128(uint8_t *p, target_long val)
153
+/*
154
+ * Encode VAL as a signed leb128 sequence at P.
155
+ * Return P incremented past the encoded value.
156
+ */
157
+static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
158
{
159
int more, byte;
160
161
@@ -XXX,XX +XXX,XX @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val)
162
return p;
163
}
164
165
-/* Decode a signed leb128 sequence at *PP; increment *PP past the
166
- decoded value. Return the decoded value. */
167
-static target_long decode_sleb128(const uint8_t **pp)
168
+/*
169
+ * Decode a signed leb128 sequence at *PP; increment *PP past the
170
+ * decoded value. Return the decoded value.
171
+ */
172
+static int64_t decode_sleb128(const uint8_t **pp)
173
{
174
const uint8_t *p = *pp;
175
- target_long val = 0;
176
+ int64_t val = 0;
177
int byte, shift = 0;
178
179
do {
180
byte = *p++;
181
- val |= (target_ulong)(byte & 0x7f) << shift;
182
+ val |= (int64_t)(byte & 0x7f) << shift;
183
shift += 7;
184
} while (byte & 0x80);
185
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
186
- val |= -(target_ulong)1 << shift;
187
+ val |= -(int64_t)1 << shift;
188
}
189
190
*pp = p;
191
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
192
int i, j, n;
193
194
for (i = 0, n = tb->icount; i < n; ++i) {
195
- target_ulong prev;
196
+ uint64_t prev;
197
198
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
199
if (i == 0) {
200
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
201
/* Dump header and the first instruction */
202
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
203
fprintf(logfile,
204
- " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
205
+ " -- guest addr 0x%016" PRIx64 " + tb prologue\n",
206
tcg_ctx->gen_insn_data[insn][0]);
207
chunk_start = tcg_ctx->gen_insn_end_off[insn];
208
disas(logfile, tb->tc.ptr, chunk_start);
209
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
210
while (insn < tb->icount) {
211
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
212
if (chunk_end > chunk_start) {
213
- fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n",
214
+ fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
215
tcg_ctx->gen_insn_data[insn][0]);
216
disas(logfile, tb->tc.ptr + chunk_start,
217
chunk_end - chunk_start);
218
diff --git a/tcg/tcg.c b/tcg/tcg.c
219
index XXXXXXX..XXXXXXX 100644
220
--- a/tcg/tcg.c
221
+++ b/tcg/tcg.c
222
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
223
col += ne_fprintf(f, "\n ----");
224
225
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
226
- target_ulong a;
227
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
228
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
229
-#else
230
- a = op->args[i];
231
-#endif
232
- col += ne_fprintf(f, " " TARGET_FMT_lx, a);
233
+ col += ne_fprintf(f, " %016" PRIx64,
234
+ tcg_get_insn_start_param(op, i));
235
}
236
} else if (c == INDEX_op_call) {
237
const TCGHelperInfo *info = tcg_call_info(op);
238
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
239
}
240
num_insns++;
241
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
242
- target_ulong a;
243
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
244
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
245
-#else
246
- a = op->args[i];
247
-#endif
248
- s->gen_insn_data[num_insns][i] = a;
249
+ s->gen_insn_data[num_insns][i] =
250
+ tcg_get_insn_start_param(op, i);
251
}
252
break;
253
case INDEX_op_discard:
254
--
255
2.34.1
256
257
diff view generated by jsdifflib
1
We will want to be able to flush a tlb without resizing.
1
Always pass the target address as uint64_t.
2
Adjust tcg_out_{ld,st}_helper_args to match.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
accel/tcg/cputlb.c | 15 ++++++++++-----
7
include/tcg/tcg-ldst.h | 26 +++++++++---------
9
1 file changed, 10 insertions(+), 5 deletions(-)
8
accel/tcg/cputlb.c | 26 +++++++++---------
9
accel/tcg/user-exec.c | 26 +++++++++---------
10
tcg/tcg.c | 62 ++++++++++++++++++++++++++++++++----------
11
4 files changed, 87 insertions(+), 53 deletions(-)
10
12
13
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-ldst.h
16
+++ b/include/tcg/tcg-ldst.h
17
@@ -XXX,XX +XXX,XX @@
18
#define TCG_LDST_H
19
20
/* Value zero-extended to tcg register size. */
21
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
22
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
23
MemOpIdx oi, uintptr_t retaddr);
24
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
25
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
26
MemOpIdx oi, uintptr_t retaddr);
27
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
28
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
29
MemOpIdx oi, uintptr_t retaddr);
30
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
31
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
32
MemOpIdx oi, uintptr_t retaddr);
33
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
34
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
35
MemOpIdx oi, uintptr_t retaddr);
36
37
/* Value sign-extended to tcg register size. */
38
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
39
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
40
MemOpIdx oi, uintptr_t retaddr);
41
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
42
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
43
MemOpIdx oi, uintptr_t retaddr);
44
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
45
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
46
MemOpIdx oi, uintptr_t retaddr);
47
48
/*
49
* Value extended to at least uint32_t, so that some ABIs do not require
50
* zero-extension from uint8_t or uint16_t.
51
*/
52
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
53
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
54
MemOpIdx oi, uintptr_t retaddr);
55
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
56
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
57
MemOpIdx oi, uintptr_t retaddr);
58
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
59
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
60
MemOpIdx oi, uintptr_t retaddr);
61
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
62
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
63
MemOpIdx oi, uintptr_t retaddr);
64
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
65
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
66
MemOpIdx oi, uintptr_t retaddr);
67
68
#endif /* TCG_LDST_H */
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
69
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
index XXXXXXX..XXXXXXX 100644
70
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
71
--- a/accel/tcg/cputlb.c
14
+++ b/accel/tcg/cputlb.c
72
+++ b/accel/tcg/cputlb.c
15
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
73
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
74
return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
75
}
76
77
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
78
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
79
MemOpIdx oi, uintptr_t retaddr)
80
{
81
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
82
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
83
return ret;
84
}
85
86
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
87
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
88
MemOpIdx oi, uintptr_t retaddr)
89
{
90
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
91
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
92
return ret;
93
}
94
95
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
96
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
97
MemOpIdx oi, uintptr_t retaddr)
98
{
99
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
100
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
101
return ret;
102
}
103
104
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
105
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
106
MemOpIdx oi, uintptr_t retaddr)
107
{
108
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
109
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
110
* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
111
*/
112
113
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
114
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
115
MemOpIdx oi, uintptr_t retaddr)
116
{
117
return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
118
}
119
120
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
121
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
122
MemOpIdx oi, uintptr_t retaddr)
123
{
124
return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
125
}
126
127
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
128
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
129
MemOpIdx oi, uintptr_t retaddr)
130
{
131
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
132
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
133
return ret;
134
}
135
136
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
137
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
138
uint32_t oi, uintptr_t retaddr)
139
{
140
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
141
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
16
}
142
}
17
}
143
}
18
144
19
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
145
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
20
+static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
146
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
21
{
147
MemOpIdx oi, uintptr_t ra)
22
- CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
148
{
23
- CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
149
MMULookupLocals l;
24
-
150
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
25
- tlb_mmu_resize_locked(desc, fast);
151
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
26
desc->n_used_entries = 0;
152
}
27
desc->large_page_addr = -1;
153
28
desc->large_page_mask = -1;
154
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
29
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
155
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
30
memset(desc->vtable, -1, sizeof(desc->vtable));
156
MemOpIdx oi, uintptr_t retaddr)
31
}
157
{
32
158
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
33
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
159
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
34
+{
160
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
35
+ CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
161
}
36
+ CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
162
163
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
164
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
165
MemOpIdx oi, uintptr_t retaddr)
166
{
167
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
168
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
169
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
170
}
171
172
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
173
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
174
MemOpIdx oi, uintptr_t retaddr)
175
{
176
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
177
@@ -XXX,XX +XXX,XX @@ static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
178
}
179
}
180
181
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
182
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
183
MemOpIdx oi, uintptr_t retaddr)
184
{
185
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
186
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/accel/tcg/user-exec.c
189
+++ b/accel/tcg/user-exec.c
190
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
191
return ret;
192
}
193
194
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
195
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
196
MemOpIdx oi, uintptr_t ra)
197
{
198
return do_ld1_mmu(env, addr, get_memop(oi), ra);
199
}
200
201
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
202
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
203
MemOpIdx oi, uintptr_t ra)
204
{
205
return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
206
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
207
return ret;
208
}
209
210
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
211
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
212
MemOpIdx oi, uintptr_t ra)
213
{
214
MemOp mop = get_memop(oi);
215
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
216
return ret;
217
}
218
219
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
220
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
221
MemOpIdx oi, uintptr_t ra)
222
{
223
MemOp mop = get_memop(oi);
224
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
225
return ret;
226
}
227
228
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
229
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
230
MemOpIdx oi, uintptr_t ra)
231
{
232
MemOp mop = get_memop(oi);
233
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
234
return ret;
235
}
236
237
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
238
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
239
MemOpIdx oi, uintptr_t ra)
240
{
241
MemOp mop = get_memop(oi);
242
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
243
return ret;
244
}
245
246
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
247
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
248
MemOpIdx oi, uintptr_t ra)
249
{
250
MemOp mop = get_memop(oi);
251
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
252
return ret;
253
}
254
255
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
256
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
257
MemOpIdx oi, uintptr_t ra)
258
{
259
MemOp mop = get_memop(oi);
260
@@ -XXX,XX +XXX,XX @@ static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
261
clear_helper_retaddr();
262
}
263
264
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
265
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
266
MemOpIdx oi, uintptr_t ra)
267
{
268
do_st1_mmu(env, addr, val, get_memop(oi), ra);
269
@@ -XXX,XX +XXX,XX @@ static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
270
clear_helper_retaddr();
271
}
272
273
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
274
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
275
MemOpIdx oi, uintptr_t ra)
276
{
277
MemOp mop = get_memop(oi);
278
@@ -XXX,XX +XXX,XX @@ static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
279
clear_helper_retaddr();
280
}
281
282
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
283
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
284
MemOpIdx oi, uintptr_t ra)
285
{
286
MemOp mop = get_memop(oi);
287
@@ -XXX,XX +XXX,XX @@ static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
288
clear_helper_retaddr();
289
}
290
291
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
292
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
293
MemOpIdx oi, uintptr_t ra)
294
{
295
MemOp mop = get_memop(oi);
296
@@ -XXX,XX +XXX,XX @@ static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
297
clear_helper_retaddr();
298
}
299
300
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
301
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
302
MemOpIdx oi, uintptr_t ra)
303
{
304
MemOp mop = get_memop(oi);
305
diff --git a/tcg/tcg.c b/tcg/tcg.c
306
index XXXXXXX..XXXXXXX 100644
307
--- a/tcg/tcg.c
308
+++ b/tcg/tcg.c
309
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld32_mmu = {
310
.flags = TCG_CALL_NO_WG,
311
.typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
312
| dh_typemask(env, 1)
313
- | dh_typemask(tl, 2) /* target_ulong addr */
314
+ | dh_typemask(i64, 2) /* uint64_t addr */
315
| dh_typemask(i32, 3) /* unsigned oi */
316
| dh_typemask(ptr, 4) /* uintptr_t ra */
317
};
318
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld64_mmu = {
319
.flags = TCG_CALL_NO_WG,
320
.typemask = dh_typemask(i64, 0) /* return uint64_t */
321
| dh_typemask(env, 1)
322
- | dh_typemask(tl, 2) /* target_ulong addr */
323
+ | dh_typemask(i64, 2) /* uint64_t addr */
324
| dh_typemask(i32, 3) /* unsigned oi */
325
| dh_typemask(ptr, 4) /* uintptr_t ra */
326
};
327
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld128_mmu = {
328
.flags = TCG_CALL_NO_WG,
329
.typemask = dh_typemask(i128, 0) /* return Int128 */
330
| dh_typemask(env, 1)
331
- | dh_typemask(tl, 2) /* target_ulong addr */
332
+ | dh_typemask(i64, 2) /* uint64_t addr */
333
| dh_typemask(i32, 3) /* unsigned oi */
334
| dh_typemask(ptr, 4) /* uintptr_t ra */
335
};
336
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st32_mmu = {
337
.flags = TCG_CALL_NO_WG,
338
.typemask = dh_typemask(void, 0)
339
| dh_typemask(env, 1)
340
- | dh_typemask(tl, 2) /* target_ulong addr */
341
+ | dh_typemask(i64, 2) /* uint64_t addr */
342
| dh_typemask(i32, 3) /* uint32_t data */
343
| dh_typemask(i32, 4) /* unsigned oi */
344
| dh_typemask(ptr, 5) /* uintptr_t ra */
345
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st64_mmu = {
346
.flags = TCG_CALL_NO_WG,
347
.typemask = dh_typemask(void, 0)
348
| dh_typemask(env, 1)
349
- | dh_typemask(tl, 2) /* target_ulong addr */
350
+ | dh_typemask(i64, 2) /* uint64_t addr */
351
| dh_typemask(i64, 3) /* uint64_t data */
352
| dh_typemask(i32, 4) /* unsigned oi */
353
| dh_typemask(ptr, 5) /* uintptr_t ra */
354
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st128_mmu = {
355
.flags = TCG_CALL_NO_WG,
356
.typemask = dh_typemask(void, 0)
357
| dh_typemask(env, 1)
358
- | dh_typemask(tl, 2) /* target_ulong addr */
359
+ | dh_typemask(i64, 2) /* uint64_t addr */
360
| dh_typemask(i128, 3) /* Int128 data */
361
| dh_typemask(i32, 4) /* unsigned oi */
362
| dh_typemask(ptr, 5) /* uintptr_t ra */
363
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
364
next_arg = 1;
365
366
loc = &info->in[next_arg];
367
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
368
- ldst->addrlo_reg, ldst->addrhi_reg);
369
- next_arg += nmov;
370
+ if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
371
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
372
+ ldst->addrlo_reg, ldst->addrhi_reg);
373
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
374
+ next_arg += nmov;
375
+ } else {
376
+ /*
377
+ * 32-bit host with 32-bit guest: zero-extend the guest address
378
+ * to 64-bits for the helper by storing the low part, then
379
+ * load a zero for the high part.
380
+ */
381
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
382
+ TCG_TYPE_I32, TCG_TYPE_I32,
383
+ ldst->addrlo_reg, -1);
384
+ tcg_out_helper_load_slots(s, 1, mov, parm);
385
386
- tcg_out_helper_load_slots(s, nmov, mov, parm);
387
+ tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
388
+ TCG_TYPE_I32, 0, parm);
389
+ next_arg += 2;
390
+ }
391
392
switch (info->out_kind) {
393
case TCG_CALL_RET_NORMAL:
394
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
395
396
/* Handle addr argument. */
397
loc = &info->in[next_arg];
398
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
399
- ldst->addrlo_reg, ldst->addrhi_reg);
400
- next_arg += n;
401
- nmov += n;
402
+ if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
403
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
404
+ ldst->addrlo_reg, ldst->addrhi_reg);
405
+ next_arg += n;
406
+ nmov += n;
407
+ } else {
408
+ /*
409
+ * 32-bit host with 32-bit guest: zero-extend the guest address
410
+ * to 64-bits for the helper by storing the low part. Later,
411
+ * after we have processed the register inputs, we will load a
412
+ * zero for the high part.
413
+ */
414
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
415
+ TCG_TYPE_I32, TCG_TYPE_I32,
416
+ ldst->addrlo_reg, -1);
417
+ next_arg += 2;
418
+ nmov += 1;
419
+ }
420
421
/* Handle data argument. */
422
loc = &info->in[next_arg];
423
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
424
g_assert_not_reached();
425
}
426
427
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32) {
428
+ loc = &info->in[1 + !HOST_BIG_ENDIAN];
429
+ tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
430
+ }
37
+
431
+
38
+ tlb_mmu_resize_locked(desc, fast);
432
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
39
+ tlb_mmu_flush_locked(desc, fast);
433
}
40
+}
434
41
+
42
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
43
{
44
env_tlb(env)->d[mmu_idx].n_used_entries++;
45
--
435
--
46
2.20.1
436
2.34.1
47
437
48
438
diff view generated by jsdifflib
1
There's little point in leaving these data structures half initialized,
1
Always pass the target address as uint64_t.
2
and relying on a flush to be done during reset.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
accel/tcg/cputlb.c | 5 +++--
6
accel/tcg/tcg-runtime.h | 4 ++--
9
1 file changed, 3 insertions(+), 2 deletions(-)
7
accel/tcg/cputlb.c | 5 ++---
8
accel/tcg/user-exec.c | 5 ++---
9
tcg/tcg-op-ldst.c | 26 ++++++++++++++++++++++++--
10
4 files changed, 30 insertions(+), 10 deletions(-)
10
11
12
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/tcg-runtime.h
15
+++ b/accel/tcg/tcg-runtime.h
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
17
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
18
#endif /* IN_HELPER_PROTO */
19
20
-DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
21
-DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
22
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
23
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
24
25
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
26
i32, env, tl, i32, i32, i32)
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
27
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
29
--- a/accel/tcg/cputlb.c
14
+++ b/accel/tcg/cputlb.c
30
+++ b/accel/tcg/cputlb.c
15
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
31
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
16
fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
32
return do_ld16_mmu(env, addr, oi, retaddr);
17
fast->table = g_new(CPUTLBEntry, n_entries);
18
desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
19
+ tlb_mmu_flush_locked(desc, fast);
20
}
33
}
21
34
22
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
35
-Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
23
@@ -XXX,XX +XXX,XX @@ void tlb_init(CPUState *cpu)
36
+Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, uint32_t oi)
24
37
{
25
qemu_spin_init(&env_tlb(env)->c.lock);
38
return helper_ld16_mmu(env, addr, oi, GETPC());
26
39
}
27
- /* Ensure that cpu_reset performs a full flush. */
40
@@ -XXX,XX +XXX,XX @@ void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
28
- env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
41
do_st16_mmu(env, addr, val, oi, retaddr);
29
+ /* All tlbs are initialized flushed. */
42
}
30
+ env_tlb(env)->c.dirty = 0;
43
31
44
-void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
32
for (i = 0; i < NB_MMU_MODES; i++) {
45
- MemOpIdx oi)
33
tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
46
+void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
47
{
48
helper_st16_mmu(env, addr, val, oi, GETPC());
49
}
50
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/accel/tcg/user-exec.c
53
+++ b/accel/tcg/user-exec.c
54
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
55
return ret;
56
}
57
58
-Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
59
+Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
60
{
61
return helper_ld16_mmu(env, addr, oi, GETPC());
62
}
63
@@ -XXX,XX +XXX,XX @@ void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
64
do_st16_he_mmu(env, addr, val, mop, ra);
65
}
66
67
-void helper_st_i128(CPUArchState *env, target_ulong addr,
68
- Int128 val, MemOpIdx oi)
69
+void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
70
{
71
helper_st16_mmu(env, addr, val, oi, GETPC());
72
}
73
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-ldst.c
76
+++ b/tcg/tcg-op-ldst.c
77
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
78
#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
79
#endif
80
81
+static TCGv_i64 maybe_extend_addr64(TCGv addr)
82
+{
83
+#if TARGET_LONG_BITS == 32
84
+ TCGv_i64 a64 = tcg_temp_ebb_new_i64();
85
+ tcg_gen_extu_i32_i64(a64, addr);
86
+ return a64;
87
+#else
88
+ return addr;
89
+#endif
90
+}
91
+
92
+static void maybe_free_addr64(TCGv_i64 a64)
93
+{
94
+#if TARGET_LONG_BITS == 32
95
+ tcg_temp_free_i64(a64);
96
+#endif
97
+}
98
+
99
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
100
{
101
const MemOpIdx oi = make_memop_idx(memop, idx);
102
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
103
tcg_gen_bswap64_i64(y, y);
104
}
105
} else {
106
- gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
107
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
108
+ gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
109
+ maybe_free_addr64(a64);
110
}
111
112
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
113
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
114
}
115
tcg_temp_free(addr_p8);
116
} else {
117
- gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
118
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
119
+ gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
120
+ maybe_free_addr64(a64);
121
}
122
123
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
34
--
124
--
35
2.20.1
125
2.34.1
36
126
37
127
diff view generated by jsdifflib
New patch
1
1
Always pass the target address as uint64_t.
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
accel/tcg/tcg-runtime.h | 46 +++++++++++++++++------------------
7
tcg/tcg-op-ldst.c | 38 ++++++++++++++++++++---------
8
accel/tcg/atomic_common.c.inc | 14 +++++------
9
3 files changed, 57 insertions(+), 41 deletions(-)
10
11
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/tcg-runtime.h
14
+++ b/accel/tcg/tcg-runtime.h
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
16
DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
17
18
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
19
- i32, env, tl, i32, i32, i32)
20
+ i32, env, i64, i32, i32, i32)
21
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
22
- i32, env, tl, i32, i32, i32)
23
+ i32, env, i64, i32, i32, i32)
24
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
25
- i32, env, tl, i32, i32, i32)
26
+ i32, env, i64, i32, i32, i32)
27
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
28
- i32, env, tl, i32, i32, i32)
29
+ i32, env, i64, i32, i32, i32)
30
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
31
- i32, env, tl, i32, i32, i32)
32
+ i32, env, i64, i32, i32, i32)
33
#ifdef CONFIG_ATOMIC64
34
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
35
- i64, env, tl, i64, i64, i32)
36
+ i64, env, i64, i64, i64, i32)
37
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
38
- i64, env, tl, i64, i64, i32)
39
+ i64, env, i64, i64, i64, i32)
40
#endif
41
#ifdef CONFIG_CMPXCHG128
42
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
43
- i128, env, tl, i128, i128, i32)
44
+ i128, env, i64, i128, i128, i32)
45
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
46
- i128, env, tl, i128, i128, i32)
47
+ i128, env, i64, i128, i128, i32)
48
#endif
49
50
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
51
- i128, env, tl, i128, i128, i32)
52
+ i128, env, i64, i128, i128, i32)
53
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
54
- i128, env, tl, i128, i128, i32)
55
+ i128, env, i64, i128, i128, i32)
56
57
#ifdef CONFIG_ATOMIC64
58
#define GEN_ATOMIC_HELPERS(NAME) \
59
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
60
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
61
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
62
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
63
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
64
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
65
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
66
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
67
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
68
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
69
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
70
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
71
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
72
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
73
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
74
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le), \
75
- TCG_CALL_NO_WG, i64, env, tl, i64, i32) \
76
+ TCG_CALL_NO_WG, i64, env, i64, i64, i32) \
77
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be), \
78
- TCG_CALL_NO_WG, i64, env, tl, i64, i32)
79
+ TCG_CALL_NO_WG, i64, env, i64, i64, i32)
80
#else
81
#define GEN_ATOMIC_HELPERS(NAME) \
82
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
83
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
84
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
85
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
86
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
87
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
88
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
89
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
90
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
91
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
92
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
93
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
94
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
95
- TCG_CALL_NO_WG, i32, env, tl, i32, i32)
96
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32)
97
#endif /* CONFIG_ATOMIC64 */
98
99
GEN_ATOMIC_HELPERS(fetch_add)
100
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/tcg/tcg-op-ldst.c
103
+++ b/tcg/tcg-op-ldst.c
104
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
105
}
106
}
107
108
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
109
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
110
TCGv_i32, TCGv_i32, TCGv_i32);
111
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
112
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
113
TCGv_i64, TCGv_i64, TCGv_i32);
114
-typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
115
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
116
TCGv_i128, TCGv_i128, TCGv_i32);
117
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
118
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
119
TCGv_i32, TCGv_i32);
120
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
121
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
122
TCGv_i64, TCGv_i32);
123
124
#ifdef CONFIG_ATOMIC64
125
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
126
TCGv_i32 newv, TCGArg idx, MemOp memop)
127
{
128
gen_atomic_cx_i32 gen;
129
+ TCGv_i64 a64;
130
MemOpIdx oi;
131
132
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
133
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
134
tcg_debug_assert(gen != NULL);
135
136
oi = make_memop_idx(memop & ~MO_SIGN, idx);
137
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
138
+ a64 = maybe_extend_addr64(addr);
139
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
140
+ maybe_free_addr64(a64);
141
142
if (memop & MO_SIGN) {
143
tcg_gen_ext_i32(retv, retv, memop);
144
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
145
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
146
if (gen) {
147
MemOpIdx oi = make_memop_idx(memop, idx);
148
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
149
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
150
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
151
+ maybe_free_addr64(a64);
152
return;
153
}
154
155
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
156
? gen_helper_nonatomic_cmpxchgo_le
157
: gen_helper_nonatomic_cmpxchgo_be);
158
MemOpIdx oi = make_memop_idx(memop, idx);
159
+ TCGv_i64 a64;
160
161
tcg_debug_assert((memop & MO_SIZE) == MO_128);
162
tcg_debug_assert((memop & MO_SIGN) == 0);
163
164
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
165
+ a64 = maybe_extend_addr64(addr);
166
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
167
+ maybe_free_addr64(a64);
168
} else {
169
TCGv_i128 oldv = tcg_temp_ebb_new_i128();
170
TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
171
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
172
173
if (gen) {
174
MemOpIdx oi = make_memop_idx(memop, idx);
175
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
176
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
177
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
178
+ maybe_free_addr64(a64);
179
return;
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
183
TCGArg idx, MemOp memop, void * const table[])
184
{
185
gen_atomic_op_i32 gen;
186
+ TCGv_i64 a64;
187
MemOpIdx oi;
188
189
memop = tcg_canonicalize_memop(memop, 0, 0);
190
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
191
tcg_debug_assert(gen != NULL);
192
193
oi = make_memop_idx(memop & ~MO_SIGN, idx);
194
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
195
+ a64 = maybe_extend_addr64(addr);
196
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
197
+ maybe_free_addr64(a64);
198
199
if (memop & MO_SIGN) {
200
tcg_gen_ext_i32(ret, ret, memop);
201
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
202
if ((memop & MO_SIZE) == MO_64) {
203
#ifdef CONFIG_ATOMIC64
204
gen_atomic_op_i64 gen;
205
+ TCGv_i64 a64;
206
MemOpIdx oi;
207
208
gen = table[memop & (MO_SIZE | MO_BSWAP)];
209
tcg_debug_assert(gen != NULL);
210
211
oi = make_memop_idx(memop & ~MO_SIGN, idx);
212
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
213
+ a64 = maybe_extend_addr64(addr);
214
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
215
+ maybe_free_addr64(a64);
216
#else
217
gen_helper_exit_atomic(cpu_env);
218
/* Produce a result, so that we have a well-formed opcode stream
219
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
220
index XXXXXXX..XXXXXXX 100644
221
--- a/accel/tcg/atomic_common.c.inc
222
+++ b/accel/tcg/atomic_common.c.inc
223
@@ -XXX,XX +XXX,XX @@
224
* See the COPYING file in the top-level directory.
225
*/
226
227
-static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
228
+static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
229
MemOpIdx oi)
230
{
231
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
232
}
233
234
#if HAVE_ATOMIC128
235
-static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
236
+static void atomic_trace_ld_post(CPUArchState *env, uint64_t addr,
237
MemOpIdx oi)
238
{
239
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
240
}
241
242
-static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
243
+static void atomic_trace_st_post(CPUArchState *env, uint64_t addr,
244
MemOpIdx oi)
245
{
246
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
247
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
248
*/
249
250
#define CMPXCHG_HELPER(OP, TYPE) \
251
- TYPE HELPER(atomic_##OP)(CPUArchState *env, target_ulong addr, \
252
+ TYPE HELPER(atomic_##OP)(CPUArchState *env, uint64_t addr, \
253
TYPE oldv, TYPE newv, uint32_t oi) \
254
{ return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); }
255
256
@@ -XXX,XX +XXX,XX @@ CMPXCHG_HELPER(cmpxchgo_le, Int128)
257
258
#undef CMPXCHG_HELPER
259
260
-Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
261
+Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, uint64_t addr,
262
Int128 cmpv, Int128 newv, uint32_t oi)
263
{
264
#if TCG_TARGET_REG_BITS == 32
265
@@ -XXX,XX +XXX,XX @@ Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
266
#endif
267
}
268
269
-Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
270
+Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, uint64_t addr,
271
Int128 cmpv, Int128 newv, uint32_t oi)
272
{
273
#if TCG_TARGET_REG_BITS == 32
274
@@ -XXX,XX +XXX,XX @@ Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
275
}
276
277
#define ATOMIC_HELPER(OP, TYPE) \
278
- TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \
279
+ TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, uint64_t addr, \
280
TYPE val, uint32_t oi) \
281
{ return glue(glue(cpu_atomic_,OP),_mmu)(env, addr, val, oi, GETPC()); }
282
283
--
284
2.34.1
285
286
diff view generated by jsdifflib
New patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
include/tcg/tcg.h | 2 +-
5
tcg/tcg.c | 2 +-
6
2 files changed, 2 insertions(+), 2 deletions(-)
1
7
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
9
index XXXXXXX..XXXXXXX 100644
10
--- a/include/tcg/tcg.h
11
+++ b/include/tcg/tcg.h
12
@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
13
void tcg_prologue_init(TCGContext *s);
14
void tcg_func_start(TCGContext *s);
15
16
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
17
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start);
18
19
void tb_target_set_jmp_target(const TranslationBlock *, int,
20
uintptr_t, uintptr_t);
21
diff --git a/tcg/tcg.c b/tcg/tcg.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/tcg/tcg.c
24
+++ b/tcg/tcg.c
25
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
26
#endif
27
28
29
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
30
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
31
{
32
#ifdef CONFIG_PROFILER
33
TCGProfile *prof = &s->prof;
34
--
35
2.34.1
36
37
diff view generated by jsdifflib
New patch
1
As gen_mem_wrapped is only used in plugin_gen_empty_mem_callback,
2
we can avoid the curiosity of union mem_gen_fn by inlining it.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
accel/tcg/plugin-gen.c | 30 ++++++------------------------
8
1 file changed, 6 insertions(+), 24 deletions(-)
9
10
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/accel/tcg/plugin-gen.c
13
+++ b/accel/tcg/plugin-gen.c
14
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
15
}
16
}
17
18
-union mem_gen_fn {
19
- void (*mem_fn)(TCGv, uint32_t);
20
- void (*inline_fn)(void);
21
-};
22
-
23
-static void gen_mem_wrapped(enum plugin_gen_cb type,
24
- const union mem_gen_fn *f, TCGv addr,
25
- uint32_t info, bool is_mem)
26
+void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
27
{
28
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
29
30
- gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
31
- if (is_mem) {
32
- f->mem_fn(addr, info);
33
- } else {
34
- f->inline_fn();
35
- }
36
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_MEM, rw);
37
+ gen_empty_mem_cb(addr, info);
38
tcg_gen_plugin_cb_end();
39
-}
40
41
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
42
-{
43
- union mem_gen_fn fn;
44
-
45
- fn.mem_fn = gen_empty_mem_cb;
46
- gen_mem_wrapped(PLUGIN_GEN_CB_MEM, &fn, addr, info, true);
47
-
48
- fn.inline_fn = gen_empty_inline_cb;
49
- gen_mem_wrapped(PLUGIN_GEN_CB_INLINE, &fn, 0, info, false);
50
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_INLINE, rw);
51
+ gen_empty_inline_cb();
52
+ tcg_gen_plugin_cb_end();
53
}
54
55
static TCGOp *find_op(TCGOp *op, TCGOpcode opc)
56
--
57
2.34.1
58
59
diff view generated by jsdifflib
New patch
1
As do_gen_mem_cb is called once, merge it into gen_empty_mem_cb.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
accel/tcg/plugin-gen.c | 39 +++++++++++++++++----------------------
7
1 file changed, 17 insertions(+), 22 deletions(-)
8
9
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/accel/tcg/plugin-gen.c
12
+++ b/accel/tcg/plugin-gen.c
13
@@ -XXX,XX +XXX,XX @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
14
void *userdata)
15
{ }
16
17
-static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
18
-{
19
- TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
20
- TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
21
- TCGv_i64 vaddr64 = tcg_temp_ebb_new_i64();
22
- TCGv_ptr udata = tcg_temp_ebb_new_ptr();
23
-
24
- tcg_gen_movi_i32(meminfo, info);
25
- tcg_gen_movi_ptr(udata, 0);
26
- tcg_gen_ld_i32(cpu_index, cpu_env,
27
- -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
28
- tcg_gen_extu_tl_i64(vaddr64, vaddr);
29
-
30
- gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, vaddr64, udata);
31
-
32
- tcg_temp_free_ptr(udata);
33
- tcg_temp_free_i64(vaddr64);
34
- tcg_temp_free_i32(meminfo);
35
- tcg_temp_free_i32(cpu_index);
36
-}
37
-
38
static void gen_empty_udata_cb(void)
39
{
40
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
41
@@ -XXX,XX +XXX,XX @@ static void gen_empty_inline_cb(void)
42
43
static void gen_empty_mem_cb(TCGv addr, uint32_t info)
44
{
45
- do_gen_mem_cb(addr, info);
46
+ TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
47
+ TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
48
+ TCGv_i64 addr64 = tcg_temp_ebb_new_i64();
49
+ TCGv_ptr udata = tcg_temp_ebb_new_ptr();
50
+
51
+ tcg_gen_movi_i32(meminfo, info);
52
+ tcg_gen_movi_ptr(udata, 0);
53
+ tcg_gen_ld_i32(cpu_index, cpu_env,
54
+ -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
55
+ tcg_gen_extu_tl_i64(addr64, addr);
56
+
57
+ gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr64, udata);
58
+
59
+ tcg_temp_free_ptr(udata);
60
+ tcg_temp_free_i64(addr64);
61
+ tcg_temp_free_i32(meminfo);
62
+ tcg_temp_free_i32(cpu_index);
63
}
64
65
/*
66
--
67
2.34.1
68
69
diff view generated by jsdifflib
1
Do not call get_clock_realtime() in tlb_mmu_resize_locked,
1
We only need to make copies for loads, when the destination
2
but hoist outside of any loop over a set of tlbs. This is
2
overlaps the address. For now, only eliminate the copy for
3
only two (indirect) callers, tlb_flush_by_mmuidx_async_work
3
stores and 128-bit loads.
4
and tlb_flush_page_locked, so not onerous.
4
5
Rename plugin_prep_mem_callbacks to plugin_maybe_preserve_addr,
6
returning NULL if no copy is made.
5
7
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
10
---
11
accel/tcg/cputlb.c | 14 ++++++++------
11
tcg/tcg-op-ldst.c | 38 ++++++++++++++++++++------------------
12
1 file changed, 8 insertions(+), 6 deletions(-)
12
1 file changed, 20 insertions(+), 18 deletions(-)
13
13
14
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
14
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/cputlb.c
16
--- a/tcg/tcg-op-ldst.c
17
+++ b/accel/tcg/cputlb.c
17
+++ b/tcg/tcg-op-ldst.c
18
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
18
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
19
* high), since otherwise we are likely to have a significant amount of
19
}
20
* conflict misses.
20
}
21
*/
21
22
-static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
22
-static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
23
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
23
+/* Only required for loads, where value might overlap addr. */
24
+ int64_t now)
24
+static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
25
{
25
{
26
size_t old_size = tlb_n_entries(fast);
26
#ifdef CONFIG_PLUGIN
27
size_t rate;
27
if (tcg_ctx->plugin_insn != NULL) {
28
size_t new_size = old_size;
28
@@ -XXX,XX +XXX,XX @@ static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
29
- int64_t now = get_clock_realtime();
29
return temp;
30
int64_t window_len_ms = 100;
30
}
31
int64_t window_len_ns = window_len_ms * 1000 * 1000;
31
#endif
32
bool window_expired = now > desc->window_begin_ns + window_len_ns;
32
- return vaddr;
33
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
33
+ return NULL;
34
memset(desc->vtable, -1, sizeof(desc->vtable));
35
}
34
}
36
35
37
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
36
-static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
38
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
37
- enum qemu_plugin_mem_rw rw)
39
+ int64_t now)
38
+static void
39
+plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
40
+ enum qemu_plugin_mem_rw rw)
40
{
41
{
41
CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
42
#ifdef CONFIG_PLUGIN
42
CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
43
if (tcg_ctx->plugin_insn != NULL) {
43
44
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
44
- tlb_mmu_resize_locked(desc, fast);
45
- plugin_gen_empty_mem_callback(vaddr, info);
45
+ tlb_mmu_resize_locked(desc, fast, now);
46
- tcg_temp_free(vaddr);
46
tlb_mmu_flush_locked(desc, fast);
47
+ plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
48
+ if (copy_addr) {
49
+ tcg_temp_free(copy_addr);
50
+ }
51
}
52
#endif
47
}
53
}
48
54
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
49
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
55
{
50
CPUArchState *env = cpu->env_ptr;
56
MemOp orig_memop;
51
uint16_t asked = data.host_int;
57
MemOpIdx oi;
52
uint16_t all_dirty, work, to_clean;
58
+ TCGv copy_addr;
53
+ int64_t now = get_clock_realtime();
59
54
60
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
55
assert_cpu_is_self(cpu);
61
memop = tcg_canonicalize_memop(memop, 0, 0);
56
62
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
57
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
63
}
58
59
for (work = to_clean; work != 0; work &= work - 1) {
60
int mmu_idx = ctz32(work);
61
- tlb_flush_one_mmuidx_locked(env, mmu_idx);
62
+ tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
63
}
64
}
64
65
65
qemu_spin_unlock(&env_tlb(env)->c.lock);
66
- addr = plugin_prep_mem_callbacks(addr);
66
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
67
+ copy_addr = plugin_maybe_preserve_addr(addr);
67
tlb_debug("forcing full flush midx %d ("
68
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
68
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
69
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
69
midx, lp_addr, lp_mask);
70
+ plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
70
- tlb_flush_one_mmuidx_locked(env, midx);
71
71
+ tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
72
if ((orig_memop ^ memop) & MO_BSWAP) {
73
switch (orig_memop & MO_SIZE) {
74
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
75
memop &= ~MO_BSWAP;
76
}
77
78
- addr = plugin_prep_mem_callbacks(addr);
79
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
80
gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
72
} else {
81
} else {
73
if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
82
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
74
tlb_n_used_entries_dec(env, midx);
83
}
84
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
85
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
86
87
if (swap) {
88
tcg_temp_free_i32(swap);
89
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
90
{
91
MemOp orig_memop;
92
MemOpIdx oi;
93
+ TCGv copy_addr;
94
95
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
96
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
97
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
98
}
99
}
100
101
- addr = plugin_prep_mem_callbacks(addr);
102
+ copy_addr = plugin_maybe_preserve_addr(addr);
103
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
104
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
105
+ plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
106
107
if ((orig_memop ^ memop) & MO_BSWAP) {
108
int flags = (orig_memop & MO_SIGN
109
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
110
memop &= ~MO_BSWAP;
111
}
112
113
- addr = plugin_prep_mem_callbacks(addr);
114
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
115
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
116
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
117
118
if (swap) {
119
tcg_temp_free_i64(swap);
120
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
121
tcg_debug_assert((memop & MO_SIGN) == 0);
122
123
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
124
- addr = plugin_prep_mem_callbacks(addr);
125
126
/* TODO: For now, force 32-bit hosts to use the helper. */
127
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
129
maybe_free_addr64(a64);
130
}
131
132
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
133
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
134
}
135
136
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
137
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
138
tcg_debug_assert((memop & MO_SIGN) == 0);
139
140
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
141
- addr = plugin_prep_mem_callbacks(addr);
142
143
/* TODO: For now, force 32-bit hosts to use the helper. */
144
145
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
146
maybe_free_addr64(a64);
147
}
148
149
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
150
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
151
}
152
153
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
75
--
154
--
76
2.20.1
155
2.34.1
77
156
78
157
diff view generated by jsdifflib
New patch
1
Since we do this inside gen_empty_mem_cb anyway, let's
2
do this earlier inside tcg expansion.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/plugin-gen.h | 4 ++--
8
accel/tcg/plugin-gen.c | 9 +++------
9
tcg/tcg-op-ldst.c | 28 ++++++++++++++++++++--------
10
3 files changed, 25 insertions(+), 16 deletions(-)
11
12
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/exec/plugin-gen.h
15
+++ b/include/exec/plugin-gen.h
16
@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
17
void plugin_gen_insn_end(void);
18
19
void plugin_gen_disable_mem_helpers(void);
20
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info);
21
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info);
22
23
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
24
{
25
@@ -XXX,XX +XXX,XX @@ static inline void plugin_gen_tb_end(CPUState *cpu)
26
static inline void plugin_gen_disable_mem_helpers(void)
27
{ }
28
29
-static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
30
+static inline void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
31
{ }
32
33
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
34
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/accel/tcg/plugin-gen.c
37
+++ b/accel/tcg/plugin-gen.c
38
@@ -XXX,XX +XXX,XX @@ static void gen_empty_inline_cb(void)
39
tcg_temp_free_i64(val);
40
}
41
42
-static void gen_empty_mem_cb(TCGv addr, uint32_t info)
43
+static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
44
{
45
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
46
TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
47
- TCGv_i64 addr64 = tcg_temp_ebb_new_i64();
48
TCGv_ptr udata = tcg_temp_ebb_new_ptr();
49
50
tcg_gen_movi_i32(meminfo, info);
51
tcg_gen_movi_ptr(udata, 0);
52
tcg_gen_ld_i32(cpu_index, cpu_env,
53
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
54
- tcg_gen_extu_tl_i64(addr64, addr);
55
56
- gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr64, udata);
57
+ gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr, udata);
58
59
tcg_temp_free_ptr(udata);
60
- tcg_temp_free_i64(addr64);
61
tcg_temp_free_i32(meminfo);
62
tcg_temp_free_i32(cpu_index);
63
}
64
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
65
}
66
}
67
68
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
69
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
70
{
71
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
72
73
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-ldst.c
76
+++ b/tcg/tcg-op-ldst.c
77
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
78
}
79
80
/* Only required for loads, where value might overlap addr. */
81
-static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
82
+static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
83
{
84
#ifdef CONFIG_PLUGIN
85
if (tcg_ctx->plugin_insn != NULL) {
86
/* Save a copy of the vaddr for use after a load. */
87
- TCGv temp = tcg_temp_new();
88
- tcg_gen_mov_tl(temp, vaddr);
89
+ TCGv_i64 temp = tcg_temp_ebb_new_i64();
90
+ tcg_gen_extu_tl_i64(temp, vaddr);
91
return temp;
92
}
93
#endif
94
@@ -XXX,XX +XXX,XX @@ static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
95
}
96
97
static void
98
-plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
99
+plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
100
enum qemu_plugin_mem_rw rw)
101
{
102
#ifdef CONFIG_PLUGIN
103
if (tcg_ctx->plugin_insn != NULL) {
104
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
105
- plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
106
+
107
+#if TARGET_LONG_BITS == 64
108
if (copy_addr) {
109
- tcg_temp_free(copy_addr);
110
+ plugin_gen_empty_mem_callback(copy_addr, info);
111
+ tcg_temp_free_i64(copy_addr);
112
+ } else {
113
+ plugin_gen_empty_mem_callback(orig_addr, info);
114
}
115
+#else
116
+ if (!copy_addr) {
117
+ copy_addr = tcg_temp_ebb_new_i64();
118
+ tcg_gen_extu_tl_i64(copy_addr, orig_addr);
119
+ }
120
+ plugin_gen_empty_mem_callback(copy_addr, info);
121
+ tcg_temp_free_i64(copy_addr);
122
+#endif
123
}
124
#endif
125
}
126
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
127
{
128
MemOp orig_memop;
129
MemOpIdx oi;
130
- TCGv copy_addr;
131
+ TCGv_i64 copy_addr;
132
133
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
134
memop = tcg_canonicalize_memop(memop, 0, 0);
135
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
136
{
137
MemOp orig_memop;
138
MemOpIdx oi;
139
- TCGv copy_addr;
140
+ TCGv_i64 copy_addr;
141
142
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
143
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
144
--
145
2.34.1
146
147
diff view generated by jsdifflib
New patch
1
This will enable replacement of TARGET_LONG_BITS within tcg/.
1
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 1 +
7
accel/tcg/translate-all.c | 2 ++
8
tcg/tcg.c | 3 +++
9
3 files changed, 6 insertions(+)
10
11
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/include/tcg/tcg.h
14
+++ b/include/tcg/tcg.h
15
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
16
int nb_temps;
17
int nb_indirects;
18
int nb_ops;
19
+ TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
20
21
TCGRegSet reserved_regs;
22
intptr_t current_frame_offset;
23
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/accel/tcg/translate-all.c
26
+++ b/accel/tcg/translate-all.c
27
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
28
tb_set_page_addr0(tb, phys_pc);
29
tb_set_page_addr1(tb, -1);
30
tcg_ctx->gen_tb = tb;
31
+ tcg_ctx->addr_type = TCG_TYPE_TL;
32
+
33
tb_overflow:
34
35
#ifdef CONFIG_PROFILER
36
diff --git a/tcg/tcg.c b/tcg/tcg.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/tcg.c
39
+++ b/tcg/tcg.c
40
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
41
QTAILQ_INIT(&s->ops);
42
QTAILQ_INIT(&s->free_ops);
43
QSIMPLEQ_INIT(&s->labels);
44
+
45
+ tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
46
+ s->addr_type == TCG_TYPE_I64);
47
}
48
49
static TCGTemp *tcg_temp_alloc(TCGContext *s)
50
--
51
2.34.1
52
53
diff view generated by jsdifflib
New patch
1
Expand from TCGv to TCGTemp inline in the translators,
2
and validate that the size matches tcg_ctx->addr_type.
3
These inlines will eventually be seen only by target-specific code.
1
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-op.h | 50 ++++++-
9
tcg/tcg-op-ldst.c | 343 ++++++++++++++++++++++++++-----------------
10
2 files changed, 251 insertions(+), 142 deletions(-)
11
12
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg-op.h
15
+++ b/include/tcg/tcg-op.h
16
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_plugin_cb_end(void)
17
#define tcg_temp_new() tcg_temp_new_i32()
18
#define tcg_global_mem_new tcg_global_mem_new_i32
19
#define tcg_temp_free tcg_temp_free_i32
20
+#define tcgv_tl_temp tcgv_i32_temp
21
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
22
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
23
#else
24
#define tcg_temp_new() tcg_temp_new_i64()
25
#define tcg_global_mem_new tcg_global_mem_new_i64
26
#define tcg_temp_free tcg_temp_free_i64
27
+#define tcgv_tl_temp tcgv_i64_temp
28
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
29
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
30
#endif
31
32
-void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
33
-void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
34
-void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
35
-void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
36
-void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
37
-void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
38
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
39
+void tcg_gen_qemu_st_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
40
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
41
+void tcg_gen_qemu_st_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
42
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
43
+void tcg_gen_qemu_st_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
44
+
45
+static inline void
46
+tcg_gen_qemu_ld_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
47
+{
48
+ tcg_gen_qemu_ld_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
49
+}
50
+
51
+static inline void
52
+tcg_gen_qemu_st_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
53
+{
54
+ tcg_gen_qemu_st_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
55
+}
56
+
57
+static inline void
58
+tcg_gen_qemu_ld_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
59
+{
60
+ tcg_gen_qemu_ld_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
61
+}
62
+
63
+static inline void
64
+tcg_gen_qemu_st_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
65
+{
66
+ tcg_gen_qemu_st_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
67
+}
68
+
69
+static inline void
70
+tcg_gen_qemu_ld_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
71
+{
72
+ tcg_gen_qemu_ld_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
73
+}
74
+
75
+static inline void
76
+tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
77
+{
78
+ tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
79
+}
80
81
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
82
TCGArg, MemOp);
83
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/tcg-op-ldst.c
86
+++ b/tcg/tcg-op-ldst.c
87
@@ -XXX,XX +XXX,XX @@ static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
88
return op;
89
}
90
91
-static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
92
- MemOp memop, TCGArg idx)
93
+static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
94
+ TCGTemp *addr, MemOpIdx oi)
95
{
96
- MemOpIdx oi = make_memop_idx(memop, idx);
97
-#if TARGET_LONG_BITS == 32
98
- tcg_gen_op3i_i32(opc, val, addr, oi);
99
-#else
100
- if (TCG_TARGET_REG_BITS == 32) {
101
- tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
102
+ if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
103
+ if (vh) {
104
+ tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
105
+ } else {
106
+ tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
107
+ }
108
} else {
109
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
110
+ /* See TCGV_LOW/HIGH. */
111
+ TCGTemp *al = addr + HOST_BIG_ENDIAN;
112
+ TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
113
+
114
+ if (vh) {
115
+ tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
116
+ temp_arg(al), temp_arg(ah), oi);
117
+ } else {
118
+ tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
119
+ }
120
}
121
-#endif
122
}
123
124
-static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
125
- MemOp memop, TCGArg idx)
126
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
127
{
128
- MemOpIdx oi = make_memop_idx(memop, idx);
129
-#if TARGET_LONG_BITS == 32
130
if (TCG_TARGET_REG_BITS == 32) {
131
- tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
132
+ TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
133
+ TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
134
+ gen_ldst(opc, vl, vh, addr, oi);
135
} else {
136
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
137
+ gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
138
}
139
-#else
140
- if (TCG_TARGET_REG_BITS == 32) {
141
- tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
142
- TCGV_LOW(addr), TCGV_HIGH(addr), oi);
143
- } else {
144
- tcg_gen_op3i_i64(opc, val, addr, oi);
145
- }
146
-#endif
147
}
148
149
static void tcg_gen_req_mo(TCGBar type)
150
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
151
}
152
153
/* Only required for loads, where value might overlap addr. */
154
-static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
155
+static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
156
{
157
#ifdef CONFIG_PLUGIN
158
if (tcg_ctx->plugin_insn != NULL) {
159
/* Save a copy of the vaddr for use after a load. */
160
TCGv_i64 temp = tcg_temp_ebb_new_i64();
161
- tcg_gen_extu_tl_i64(temp, vaddr);
162
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
163
+ tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
164
+ } else {
165
+ tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
166
+ }
167
return temp;
168
}
169
#endif
170
@@ -XXX,XX +XXX,XX @@ static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
171
}
172
173
static void
174
-plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
175
+plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
176
enum qemu_plugin_mem_rw rw)
177
{
178
#ifdef CONFIG_PLUGIN
179
if (tcg_ctx->plugin_insn != NULL) {
180
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
181
182
-#if TARGET_LONG_BITS == 64
183
- if (copy_addr) {
184
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
185
+ if (!copy_addr) {
186
+ copy_addr = tcg_temp_ebb_new_i64();
187
+ tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
188
+ }
189
plugin_gen_empty_mem_callback(copy_addr, info);
190
tcg_temp_free_i64(copy_addr);
191
} else {
192
- plugin_gen_empty_mem_callback(orig_addr, info);
193
+ if (copy_addr) {
194
+ plugin_gen_empty_mem_callback(copy_addr, info);
195
+ tcg_temp_free_i64(copy_addr);
196
+ } else {
197
+ plugin_gen_empty_mem_callback(temp_tcgv_i64(orig_addr), info);
198
+ }
199
}
200
-#else
201
- if (!copy_addr) {
202
- copy_addr = tcg_temp_ebb_new_i64();
203
- tcg_gen_extu_tl_i64(copy_addr, orig_addr);
204
- }
205
- plugin_gen_empty_mem_callback(copy_addr, info);
206
- tcg_temp_free_i64(copy_addr);
207
-#endif
208
}
209
#endif
210
}
211
212
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
213
+static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
214
+ TCGArg idx, MemOp memop)
215
{
216
MemOp orig_memop;
217
- MemOpIdx oi;
218
+ MemOpIdx orig_oi, oi;
219
TCGv_i64 copy_addr;
220
221
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
222
- memop = tcg_canonicalize_memop(memop, 0, 0);
223
- oi = make_memop_idx(memop, idx);
224
+ orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
225
+ orig_oi = oi = make_memop_idx(memop, idx);
226
227
- orig_memop = memop;
228
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
229
memop &= ~MO_BSWAP;
230
/* The bswap primitive benefits from zero-extended input. */
231
if ((memop & MO_SSIZE) == MO_SW) {
232
memop &= ~MO_SIGN;
233
}
234
+ oi = make_memop_idx(memop, idx);
235
}
236
237
copy_addr = plugin_maybe_preserve_addr(addr);
238
- gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
239
- plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
240
+ gen_ldst(INDEX_op_qemu_ld_i32, tcgv_i32_temp(val), NULL, addr, oi);
241
+ plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
242
243
if ((orig_memop ^ memop) & MO_BSWAP) {
244
switch (orig_memop & MO_SIZE) {
245
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
246
}
247
}
248
249
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
250
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
251
+ MemOp memop, TCGType addr_type)
252
+{
253
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
254
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
255
+ tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
256
+}
257
+
258
+static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
259
+ TCGArg idx, MemOp memop)
260
{
261
TCGv_i32 swap = NULL;
262
- MemOpIdx oi;
263
+ MemOpIdx orig_oi, oi;
264
+ TCGOpcode opc;
265
266
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
267
memop = tcg_canonicalize_memop(memop, 0, 1);
268
- oi = make_memop_idx(memop, idx);
269
+ orig_oi = oi = make_memop_idx(memop, idx);
270
271
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
272
swap = tcg_temp_ebb_new_i32();
273
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
274
}
275
val = swap;
276
memop &= ~MO_BSWAP;
277
+ oi = make_memop_idx(memop, idx);
278
}
279
280
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
281
- gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
282
+ opc = INDEX_op_qemu_st8_i32;
283
} else {
284
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
285
+ opc = INDEX_op_qemu_st_i32;
286
}
287
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
288
+ gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
289
+ plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
290
291
if (swap) {
292
tcg_temp_free_i32(swap);
293
}
294
}
295
296
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
297
+void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
298
+ MemOp memop, TCGType addr_type)
299
+{
300
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
301
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
302
+ tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
303
+}
304
+
305
+static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
306
+ TCGArg idx, MemOp memop)
307
{
308
MemOp orig_memop;
309
- MemOpIdx oi;
310
+ MemOpIdx orig_oi, oi;
311
TCGv_i64 copy_addr;
312
313
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
314
- tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
315
+ tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
316
if (memop & MO_SIGN) {
317
tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
318
} else {
319
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
320
}
321
322
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
323
- memop = tcg_canonicalize_memop(memop, 1, 0);
324
- oi = make_memop_idx(memop, idx);
325
+ orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
326
+ orig_oi = oi = make_memop_idx(memop, idx);
327
328
- orig_memop = memop;
329
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
330
memop &= ~MO_BSWAP;
331
/* The bswap primitive benefits from zero-extended input. */
332
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
333
memop &= ~MO_SIGN;
334
}
335
+ oi = make_memop_idx(memop, idx);
336
}
337
338
copy_addr = plugin_maybe_preserve_addr(addr);
339
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
340
- plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
341
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
342
+ plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
343
344
if ((orig_memop ^ memop) & MO_BSWAP) {
345
int flags = (orig_memop & MO_SIGN
346
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
347
}
348
}
349
350
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
351
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
352
+ MemOp memop, TCGType addr_type)
353
+{
354
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
355
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
356
+ tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
357
+}
358
+
359
+static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
360
+ TCGArg idx, MemOp memop)
361
{
362
TCGv_i64 swap = NULL;
363
- MemOpIdx oi;
364
+ MemOpIdx orig_oi, oi;
365
366
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
367
- tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
368
+ tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
369
return;
370
}
371
372
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
373
memop = tcg_canonicalize_memop(memop, 1, 1);
374
- oi = make_memop_idx(memop, idx);
375
+ orig_oi = oi = make_memop_idx(memop, idx);
376
377
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
378
swap = tcg_temp_ebb_new_i64();
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
380
}
381
val = swap;
382
memop &= ~MO_BSWAP;
383
+ oi = make_memop_idx(memop, idx);
384
}
385
386
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
387
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
388
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
389
+ plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
390
391
if (swap) {
392
tcg_temp_free_i64(swap);
393
}
394
}
395
396
+void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
397
+ MemOp memop, TCGType addr_type)
398
+{
399
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
400
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
401
+ tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
402
+}
403
+
404
/*
405
* Return true if @mop, without knowledge of the pointer alignment,
406
* does not require 16-byte atomicity, and it would be adventagous
407
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
408
{
409
MemOp mop_1 = orig, mop_2;
410
411
- tcg_debug_assert((orig & MO_SIZE) == MO_128);
412
- tcg_debug_assert((orig & MO_SIGN) == 0);
413
-
414
/* Reduce the size to 64-bit. */
415
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
416
417
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
418
ret[1] = mop_2;
419
}
420
421
-#if TARGET_LONG_BITS == 64
422
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
423
-#else
424
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
425
-#endif
426
-
427
static TCGv_i64 maybe_extend_addr64(TCGv addr)
428
{
429
#if TARGET_LONG_BITS == 32
430
@@ -XXX,XX +XXX,XX @@ static void maybe_free_addr64(TCGv_i64 a64)
431
#endif
432
}
433
434
-void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
435
+static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
436
+ TCGArg idx, MemOp memop)
437
{
438
- const MemOpIdx oi = make_memop_idx(memop, idx);
439
-
440
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
441
- tcg_debug_assert((memop & MO_SIGN) == 0);
442
+ const MemOpIdx orig_oi = make_memop_idx(memop, idx);
443
+ TCGv_i64 ext_addr = NULL;
444
445
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
446
447
/* TODO: For now, force 32-bit hosts to use the helper. */
448
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
449
TCGv_i64 lo, hi;
450
- TCGArg addr_arg;
451
- MemOpIdx adj_oi;
452
bool need_bswap = false;
453
+ MemOpIdx oi = orig_oi;
454
455
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
456
lo = TCGV128_HIGH(val);
457
hi = TCGV128_LOW(val);
458
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
459
+ oi = make_memop_idx(memop & ~MO_BSWAP, idx);
460
need_bswap = true;
461
} else {
462
lo = TCGV128_LOW(val);
463
hi = TCGV128_HIGH(val);
464
- adj_oi = oi;
465
}
466
467
-#if TARGET_LONG_BITS == 32
468
- addr_arg = tcgv_i32_arg(addr);
469
-#else
470
- addr_arg = tcgv_i64_arg(addr);
471
-#endif
472
- tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
473
+ gen_ldst(INDEX_op_qemu_ld_i128, tcgv_i64_temp(lo),
474
+ tcgv_i64_temp(hi), addr, oi);
475
476
if (need_bswap) {
477
tcg_gen_bswap64_i64(lo, lo);
478
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
479
}
480
} else if (use_two_i64_for_i128(memop)) {
481
MemOp mop[2];
482
- TCGv addr_p8;
483
+ TCGTemp *addr_p8;
484
TCGv_i64 x, y;
485
+ MemOpIdx oi;
486
+ bool need_bswap;
487
488
canonicalize_memop_i128_as_i64(mop, memop);
489
+ need_bswap = (mop[0] ^ memop) & MO_BSWAP;
490
491
/*
492
* Since there are no global TCGv_i128, there is no visible state
493
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
494
y = TCGV128_LOW(val);
495
}
496
497
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
498
+ oi = make_memop_idx(mop[0], idx);
499
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, oi);
500
501
- if ((mop[0] ^ memop) & MO_BSWAP) {
502
+ if (need_bswap) {
503
tcg_gen_bswap64_i64(x, x);
504
}
505
506
- addr_p8 = tcg_temp_ebb_new();
507
- tcg_gen_addi_tl(addr_p8, addr, 8);
508
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
509
- tcg_temp_free(addr_p8);
510
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
511
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
512
+ tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
513
+ addr_p8 = tcgv_i32_temp(t);
514
+ } else {
515
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
516
+ tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
517
+ addr_p8 = tcgv_i64_temp(t);
518
+ }
519
520
- if ((mop[0] ^ memop) & MO_BSWAP) {
521
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, oi);
522
+ tcg_temp_free_internal(addr_p8);
523
+
524
+ if (need_bswap) {
525
tcg_gen_bswap64_i64(y, y);
526
}
527
} else {
528
- TCGv_i64 a64 = maybe_extend_addr64(addr);
529
- gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
530
- maybe_free_addr64(a64);
531
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
532
+ ext_addr = tcg_temp_ebb_new_i64();
533
+ tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
534
+ addr = tcgv_i64_temp(ext_addr);
535
+ }
536
+ gen_helper_ld_i128(val, cpu_env, temp_tcgv_i64(addr),
537
+ tcg_constant_i32(orig_oi));
538
}
539
540
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
541
+ plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
542
}
543
544
-void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
545
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
546
+ MemOp memop, TCGType addr_type)
547
{
548
- const MemOpIdx oi = make_memop_idx(memop, idx);
549
-
550
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
551
tcg_debug_assert((memop & MO_SIZE) == MO_128);
552
tcg_debug_assert((memop & MO_SIGN) == 0);
553
+ tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
554
+}
555
+
556
+static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
557
+ TCGArg idx, MemOp memop)
558
+{
559
+ const MemOpIdx orig_oi = make_memop_idx(memop, idx);
560
+ TCGv_i64 ext_addr = NULL;
561
562
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
563
564
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
565
566
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
567
TCGv_i64 lo, hi;
568
- TCGArg addr_arg;
569
- MemOpIdx adj_oi;
570
+ MemOpIdx oi = orig_oi;
571
bool need_bswap = false;
572
573
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
574
- lo = tcg_temp_new_i64();
575
- hi = tcg_temp_new_i64();
576
+ lo = tcg_temp_ebb_new_i64();
577
+ hi = tcg_temp_ebb_new_i64();
578
tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
579
tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
580
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
581
+ oi = make_memop_idx(memop & ~MO_BSWAP, idx);
582
need_bswap = true;
583
} else {
584
lo = TCGV128_LOW(val);
585
hi = TCGV128_HIGH(val);
586
- adj_oi = oi;
587
}
588
589
-#if TARGET_LONG_BITS == 32
590
- addr_arg = tcgv_i32_arg(addr);
591
-#else
592
- addr_arg = tcgv_i64_arg(addr);
593
-#endif
594
- tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
595
+ gen_ldst(INDEX_op_qemu_st_i128, tcgv_i64_temp(lo),
596
+ tcgv_i64_temp(hi), addr, oi);
597
598
if (need_bswap) {
599
tcg_temp_free_i64(lo);
600
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
601
}
602
} else if (use_two_i64_for_i128(memop)) {
603
MemOp mop[2];
604
- TCGv addr_p8;
605
- TCGv_i64 x, y;
606
+ TCGTemp *addr_p8;
607
+ TCGv_i64 x, y, b = NULL;
608
609
canonicalize_memop_i128_as_i64(mop, memop);
610
611
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
612
y = TCGV128_LOW(val);
613
}
614
615
- addr_p8 = tcg_temp_ebb_new();
616
if ((mop[0] ^ memop) & MO_BSWAP) {
617
- TCGv_i64 t = tcg_temp_ebb_new_i64();
618
-
619
- tcg_gen_bswap64_i64(t, x);
620
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
621
- tcg_gen_bswap64_i64(t, y);
622
- tcg_gen_addi_tl(addr_p8, addr, 8);
623
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
624
- tcg_temp_free_i64(t);
625
- } else {
626
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
627
- tcg_gen_addi_tl(addr_p8, addr, 8);
628
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
629
+ b = tcg_temp_ebb_new_i64();
630
+ tcg_gen_bswap64_i64(b, x);
631
+ x = b;
632
}
633
- tcg_temp_free(addr_p8);
634
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
635
+ make_memop_idx(mop[0], idx));
636
+
637
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
638
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
639
+ tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
640
+ addr_p8 = tcgv_i32_temp(t);
641
+ } else {
642
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
643
+ tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
644
+ addr_p8 = tcgv_i64_temp(t);
645
+ }
646
+
647
+ if (b) {
648
+ tcg_gen_bswap64_i64(b, y);
649
+ y = b;
650
+ }
651
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
652
+ make_memop_idx(mop[1], idx));
653
+
654
+ if (b) {
655
+ tcg_temp_free_i64(b);
656
+ }
657
+ tcg_temp_free_internal(addr_p8);
658
} else {
659
- TCGv_i64 a64 = maybe_extend_addr64(addr);
660
- gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
661
- maybe_free_addr64(a64);
662
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
663
+ ext_addr = tcg_temp_ebb_new_i64();
664
+ tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
665
+ addr = tcgv_i64_temp(ext_addr);
666
+ }
667
+ gen_helper_st_i128(cpu_env, temp_tcgv_i64(addr), val,
668
+ tcg_constant_i32(orig_oi));
669
}
670
671
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
672
+ plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W);
673
+}
674
+
675
+void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
676
+ MemOp memop, TCGType addr_type)
677
+{
678
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
679
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
680
+ tcg_debug_assert((memop & MO_SIGN) == 0);
681
+ tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
682
}
683
684
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
685
--
686
2.34.1
687
688
diff view generated by jsdifflib
1
There are no users of this function outside cputlb.c,
1
Expand from TCGv to TCGTemp inline in the translators,
2
and its interface will change in the next patch.
2
and validate that the size matches tcg_ctx->addr_type.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
include/exec/cpu_ldst.h | 5 -----
7
include/tcg/tcg-op.h | 184 ++++++++++++++++++++++++++++++----------
10
accel/tcg/cputlb.c | 5 +++++
8
tcg/tcg-op-ldst.c | 198 ++++++++++++++++++++++++++++---------------
11
2 files changed, 5 insertions(+), 5 deletions(-)
9
2 files changed, 267 insertions(+), 115 deletions(-)
12
10
13
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
11
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
14
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/cpu_ldst.h
13
--- a/include/tcg/tcg-op.h
16
+++ b/include/exec/cpu_ldst.h
14
+++ b/include/tcg/tcg-op.h
17
@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
15
@@ -XXX,XX +XXX,XX @@ tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
18
return (addr >> TARGET_PAGE_BITS) & size_mask;
16
tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
19
}
17
}
20
18
21
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
19
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
22
-{
20
- TCGArg, MemOp);
23
- return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
21
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
24
-}
22
- TCGArg, MemOp);
23
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
24
- TCGArg, MemOp);
25
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
26
+ TCGArg, MemOp, TCGType);
27
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
28
+ TCGArg, MemOp, TCGType);
29
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
30
+ TCGv_i128, TCGArg, MemOp, TCGType);
31
32
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
33
- TCGArg, MemOp);
34
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
35
- TCGArg, MemOp);
36
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
37
- TCGArg, MemOp);
38
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
39
+ TCGArg, MemOp, TCGType);
40
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
41
+ TCGArg, MemOp, TCGType);
42
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
43
+ TCGv_i128, TCGArg, MemOp, TCGType);
44
45
-void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
46
-void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
47
+void tcg_gen_atomic_xchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
48
+ TCGArg, MemOp, TCGType);
49
+void tcg_gen_atomic_xchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
50
+ TCGArg, MemOp, TCGType);
51
52
-void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
53
-void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
54
-void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
55
-void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
56
-void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
57
-void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
58
-void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
59
-void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
60
-void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
61
-void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
62
-void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
63
-void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
64
-void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
65
-void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
66
-void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
67
-void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
68
+void tcg_gen_atomic_fetch_add_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
69
+ TCGArg, MemOp, TCGType);
70
+void tcg_gen_atomic_fetch_add_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
71
+ TCGArg, MemOp, TCGType);
72
+void tcg_gen_atomic_fetch_and_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
73
+ TCGArg, MemOp, TCGType);
74
+void tcg_gen_atomic_fetch_and_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
75
+ TCGArg, MemOp, TCGType);
76
+void tcg_gen_atomic_fetch_or_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
77
+ TCGArg, MemOp, TCGType);
78
+void tcg_gen_atomic_fetch_or_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
79
+ TCGArg, MemOp, TCGType);
80
+void tcg_gen_atomic_fetch_xor_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
81
+ TCGArg, MemOp, TCGType);
82
+void tcg_gen_atomic_fetch_xor_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
83
+ TCGArg, MemOp, TCGType);
84
+void tcg_gen_atomic_fetch_smin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
85
+ TCGArg, MemOp, TCGType);
86
+void tcg_gen_atomic_fetch_smin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
87
+ TCGArg, MemOp, TCGType);
88
+void tcg_gen_atomic_fetch_umin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
89
+ TCGArg, MemOp, TCGType);
90
+void tcg_gen_atomic_fetch_umin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
91
+ TCGArg, MemOp, TCGType);
92
+void tcg_gen_atomic_fetch_smax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
93
+ TCGArg, MemOp, TCGType);
94
+void tcg_gen_atomic_fetch_smax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
95
+ TCGArg, MemOp, TCGType);
96
+void tcg_gen_atomic_fetch_umax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
97
+ TCGArg, MemOp, TCGType);
98
+void tcg_gen_atomic_fetch_umax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
99
+ TCGArg, MemOp, TCGType);
100
101
-void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
102
-void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
103
-void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
104
-void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
105
-void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
106
-void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
107
-void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
108
-void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
109
-void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
110
-void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
111
-void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
112
-void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
113
-void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
114
-void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
115
-void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
116
-void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
117
+void tcg_gen_atomic_add_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
118
+ TCGArg, MemOp, TCGType);
119
+void tcg_gen_atomic_add_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
120
+ TCGArg, MemOp, TCGType);
121
+void tcg_gen_atomic_and_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
122
+ TCGArg, MemOp, TCGType);
123
+void tcg_gen_atomic_and_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
124
+ TCGArg, MemOp, TCGType);
125
+void tcg_gen_atomic_or_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
126
+ TCGArg, MemOp, TCGType);
127
+void tcg_gen_atomic_or_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
128
+ TCGArg, MemOp, TCGType);
129
+void tcg_gen_atomic_xor_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
130
+ TCGArg, MemOp, TCGType);
131
+void tcg_gen_atomic_xor_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
132
+ TCGArg, MemOp, TCGType);
133
+void tcg_gen_atomic_smin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
134
+ TCGArg, MemOp, TCGType);
135
+void tcg_gen_atomic_smin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
136
+ TCGArg, MemOp, TCGType);
137
+void tcg_gen_atomic_umin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
138
+ TCGArg, MemOp, TCGType);
139
+void tcg_gen_atomic_umin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
140
+ TCGArg, MemOp, TCGType);
141
+void tcg_gen_atomic_smax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
142
+ TCGArg, MemOp, TCGType);
143
+void tcg_gen_atomic_smax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
144
+ TCGArg, MemOp, TCGType);
145
+void tcg_gen_atomic_umax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
146
+ TCGArg, MemOp, TCGType);
147
+void tcg_gen_atomic_umax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
148
+ TCGArg, MemOp, TCGType);
149
+
150
+#define DEF_ATOMIC2(N, S) \
151
+ static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S v, \
152
+ TCGArg i, MemOp m) \
153
+ { N##_##S##_chk(r, tcgv_tl_temp(a), v, i, m, TCG_TYPE_TL); }
154
+
155
+#define DEF_ATOMIC3(N, S) \
156
+ static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S o, \
157
+ TCGv_##S n, TCGArg i, MemOp m) \
158
+ { N##_##S##_chk(r, tcgv_tl_temp(a), o, n, i, m, TCG_TYPE_TL); }
159
+
160
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i32)
161
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i64)
162
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i128)
163
+
164
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i32)
165
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i64)
166
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i128)
167
+
168
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i32)
169
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i64)
170
+
171
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i32)
172
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i64)
173
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i32)
174
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i64)
175
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i32)
176
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i64)
177
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i32)
178
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i64)
179
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i32)
180
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i64)
181
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i32)
182
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i64)
183
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i32)
184
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i64)
185
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i32)
186
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i64)
187
+
188
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i32)
189
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i64)
190
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i32)
191
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i64)
192
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i32)
193
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i64)
194
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i32)
195
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i64)
196
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i32)
197
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i64)
198
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i32)
199
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i64)
200
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i32)
201
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i64)
202
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i32)
203
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
204
+
205
+#undef DEF_ATOMIC2
206
+#undef DEF_ATOMIC3
207
208
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
209
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
210
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
211
index XXXXXXX..XXXXXXX 100644
212
--- a/tcg/tcg-op-ldst.c
213
+++ b/tcg/tcg-op-ldst.c
214
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
215
ret[1] = mop_2;
216
}
217
218
-static TCGv_i64 maybe_extend_addr64(TCGv addr)
219
+static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
220
{
221
-#if TARGET_LONG_BITS == 32
222
- TCGv_i64 a64 = tcg_temp_ebb_new_i64();
223
- tcg_gen_extu_i32_i64(a64, addr);
224
- return a64;
225
-#else
226
- return addr;
227
-#endif
228
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
229
+ TCGv_i64 a64 = tcg_temp_ebb_new_i64();
230
+ tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
231
+ return a64;
232
+ }
233
+ return temp_tcgv_i64(addr);
234
}
235
236
static void maybe_free_addr64(TCGv_i64 a64)
237
{
238
-#if TARGET_LONG_BITS == 32
239
- tcg_temp_free_i64(a64);
240
-#endif
241
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
242
+ tcg_temp_free_i64(a64);
243
+ }
244
}
245
246
static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
247
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
248
WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
249
};
250
251
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
252
- TCGv_i32 newv, TCGArg idx, MemOp memop)
253
+static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
254
+ TCGv_i32 cmpv, TCGv_i32 newv,
255
+ TCGArg idx, MemOp memop)
256
{
257
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
258
TCGv_i32 t2 = tcg_temp_ebb_new_i32();
259
260
tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
261
262
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
263
+ tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
264
tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
265
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
266
+ tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
267
tcg_temp_free_i32(t2);
268
269
if (memop & MO_SIGN) {
270
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
271
tcg_temp_free_i32(t1);
272
}
273
274
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
275
- TCGv_i32 newv, TCGArg idx, MemOp memop)
276
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
277
+ TCGv_i32 cmpv, TCGv_i32 newv,
278
+ TCGArg idx, MemOp memop,
279
+ TCGType addr_type)
280
+{
281
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
282
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
283
+ tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
284
+}
285
+
286
+static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
287
+ TCGv_i32 cmpv, TCGv_i32 newv,
288
+ TCGArg idx, MemOp memop)
289
{
290
gen_atomic_cx_i32 gen;
291
TCGv_i64 a64;
292
MemOpIdx oi;
293
294
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
295
- tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
296
+ tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
297
return;
298
}
299
300
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
301
}
302
}
303
304
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
305
- TCGv_i64 newv, TCGArg idx, MemOp memop)
306
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
307
+ TCGv_i32 cmpv, TCGv_i32 newv,
308
+ TCGArg idx, MemOp memop,
309
+ TCGType addr_type)
310
+{
311
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
312
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
313
+ tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
314
+}
315
+
316
+static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
317
+ TCGv_i64 cmpv, TCGv_i64 newv,
318
+ TCGArg idx, MemOp memop)
319
{
320
TCGv_i64 t1, t2;
321
322
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
323
- tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
324
- TCGV_LOW(newv), idx, memop);
325
+ tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
326
+ TCGV_LOW(newv), idx, memop);
327
if (memop & MO_SIGN) {
328
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
329
} else {
330
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
331
332
tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
333
334
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
335
+ tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
336
tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
337
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
338
+ tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
339
tcg_temp_free_i64(t2);
340
341
if (memop & MO_SIGN) {
342
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
343
tcg_temp_free_i64(t1);
344
}
345
346
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
347
- TCGv_i64 newv, TCGArg idx, MemOp memop)
348
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
349
+ TCGv_i64 cmpv, TCGv_i64 newv,
350
+ TCGArg idx, MemOp memop,
351
+ TCGType addr_type)
352
+{
353
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
354
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
355
+ tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
356
+}
357
+
358
+static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
359
+ TCGv_i64 cmpv, TCGv_i64 newv,
360
+ TCGArg idx, MemOp memop)
361
{
362
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
363
- tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
364
+ tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
365
return;
366
}
367
368
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
369
}
370
371
if (TCG_TARGET_REG_BITS == 32) {
372
- tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
373
- TCGV_LOW(newv), idx, memop);
374
+ tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
375
+ TCGV_LOW(newv), idx, memop);
376
if (memop & MO_SIGN) {
377
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
378
} else {
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
380
381
tcg_gen_extrl_i64_i32(c32, cmpv);
382
tcg_gen_extrl_i64_i32(n32, newv);
383
- tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
384
+ tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
385
+ idx, memop & ~MO_SIGN);
386
tcg_temp_free_i32(c32);
387
tcg_temp_free_i32(n32);
388
389
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
390
}
391
}
392
393
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
394
- TCGv_i128 newv, TCGArg idx, MemOp memop)
395
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
396
+ TCGv_i64 cmpv, TCGv_i64 newv,
397
+ TCGArg idx, MemOp memop, TCGType addr_type)
398
+{
399
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
400
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
401
+ tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
402
+}
403
+
404
+static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
405
+ TCGv_i128 cmpv, TCGv_i128 newv,
406
+ TCGArg idx, MemOp memop)
407
{
408
if (TCG_TARGET_REG_BITS == 32) {
409
/* Inline expansion below is simply too large for 32-bit hosts. */
410
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
411
? gen_helper_nonatomic_cmpxchgo_le
412
: gen_helper_nonatomic_cmpxchgo_be);
413
MemOpIdx oi = make_memop_idx(memop, idx);
414
- TCGv_i64 a64;
415
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
416
417
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
418
- tcg_debug_assert((memop & MO_SIGN) == 0);
25
-
419
-
26
/* Find the TLB entry corresponding to the mmu_idx + address pair. */
420
- a64 = maybe_extend_addr64(addr);
27
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
421
gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
28
target_ulong addr)
422
maybe_free_addr64(a64);
29
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
423
} else {
30
index XXXXXXX..XXXXXXX 100644
424
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
31
--- a/accel/tcg/cputlb.c
425
TCGv_i64 t1 = tcg_temp_ebb_new_i64();
32
+++ b/accel/tcg/cputlb.c
426
TCGv_i64 z = tcg_constant_i64(0);
33
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
427
34
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
428
- tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
35
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
429
+ tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
36
430
37
+static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
431
/* Compare i128 */
432
tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
433
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
434
TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
435
436
/* Unconditional writeback. */
437
- tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
438
+ tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
439
tcg_gen_mov_i128(retv, oldv);
440
441
tcg_temp_free_i64(t0);
442
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
443
}
444
}
445
446
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
447
- TCGv_i128 newv, TCGArg idx, MemOp memop)
448
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
449
+ TCGv_i128 cmpv, TCGv_i128 newv,
450
+ TCGArg idx, MemOp memop,
451
+ TCGType addr_type)
38
+{
452
+{
39
+ return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
453
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
454
+ tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
455
+ tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
40
+}
456
+}
41
+
457
+
42
static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
458
+static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
43
{
459
+ TCGv_i128 cmpv, TCGv_i128 newv,
44
return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
460
+ TCGArg idx, MemOp memop)
461
{
462
gen_atomic_cx_i128 gen;
463
464
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
465
- tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
466
+ tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
467
return;
468
}
469
470
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
471
- tcg_debug_assert((memop & MO_SIGN) == 0);
472
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
473
-
474
if (gen) {
475
MemOpIdx oi = make_memop_idx(memop, idx);
476
TCGv_i64 a64 = maybe_extend_addr64(addr);
477
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
478
tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
479
}
480
481
-static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
482
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
483
+ TCGv_i128 cmpv, TCGv_i128 newv,
484
+ TCGArg idx, MemOp memop,
485
+ TCGType addr_type)
486
+{
487
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
488
+ tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
489
+ tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
490
+}
491
+
492
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
493
TCGArg idx, MemOp memop, bool new_val,
494
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
495
{
496
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
497
498
memop = tcg_canonicalize_memop(memop, 0, 0);
499
500
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
501
+ tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
502
tcg_gen_ext_i32(t2, val, memop);
503
gen(t2, t1, t2);
504
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
505
+ tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
506
507
tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
508
tcg_temp_free_i32(t1);
509
tcg_temp_free_i32(t2);
510
}
511
512
-static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
513
+static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
514
TCGArg idx, MemOp memop, void * const table[])
515
{
516
gen_atomic_op_i32 gen;
517
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
518
}
519
}
520
521
-static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
522
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
523
TCGArg idx, MemOp memop, bool new_val,
524
void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
525
{
526
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
527
528
memop = tcg_canonicalize_memop(memop, 1, 0);
529
530
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
531
+ tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
532
tcg_gen_ext_i64(t2, val, memop);
533
gen(t2, t1, t2);
534
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
535
+ tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
536
537
tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
538
tcg_temp_free_i64(t1);
539
tcg_temp_free_i64(t2);
540
}
541
542
-static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
543
+static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
544
TCGArg idx, MemOp memop, void * const table[])
545
{
546
memop = tcg_canonicalize_memop(memop, 1, 0);
547
548
if ((memop & MO_SIZE) == MO_64) {
549
-#ifdef CONFIG_ATOMIC64
550
- gen_atomic_op_i64 gen;
551
- TCGv_i64 a64;
552
- MemOpIdx oi;
553
+ gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
554
555
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
556
- tcg_debug_assert(gen != NULL);
557
+ if (gen) {
558
+ MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
559
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
560
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
561
+ maybe_free_addr64(a64);
562
+ return;
563
+ }
564
565
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
566
- a64 = maybe_extend_addr64(addr);
567
- gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
568
- maybe_free_addr64(a64);
569
-#else
570
gen_helper_exit_atomic(cpu_env);
571
/* Produce a result, so that we have a well-formed opcode stream
572
with respect to uses of the result in the (dead) code following. */
573
tcg_gen_movi_i64(ret, 0);
574
-#endif /* CONFIG_ATOMIC64 */
575
} else {
576
TCGv_i32 v32 = tcg_temp_ebb_new_i32();
577
TCGv_i32 r32 = tcg_temp_ebb_new_i32();
578
@@ -XXX,XX +XXX,XX @@ static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
579
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
580
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
581
}; \
582
-void tcg_gen_atomic_##NAME##_i32 \
583
- (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
584
+void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr, \
585
+ TCGv_i32 val, TCGArg idx, \
586
+ MemOp memop, TCGType addr_type) \
587
{ \
588
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type); \
589
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32); \
590
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
591
do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
592
} else { \
593
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_##NAME##_i32 \
594
tcg_gen_##OP##_i32); \
595
} \
596
} \
597
-void tcg_gen_atomic_##NAME##_i64 \
598
- (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
599
+void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr, \
600
+ TCGv_i64 val, TCGArg idx, \
601
+ MemOp memop, TCGType addr_type) \
602
{ \
603
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type); \
604
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64); \
605
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
606
do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
607
} else { \
45
--
608
--
46
2.20.1
609
2.34.1
47
610
48
611
diff view generated by jsdifflib
New patch
1
For 32-bit hosts, we cannot simply rely on TCGContext.addr_bits,
2
as we need one or two host registers to represent the guest address.
1
3
4
Create the new opcodes and update all users. Since we have not
5
yet eliminated TARGET_LONG_BITS, only one of the two opcodes will
6
ever be used, so we can get away with treating them the same in
7
the backends.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/tcg/tcg-opc.h | 35 ++++++++----
13
tcg/optimize.c | 19 +++++--
14
tcg/tcg-op-ldst.c | 83 ++++++++++++++++++++++-------
15
tcg/tcg.c | 42 ++++++++++-----
16
tcg/tci.c | 32 +++++++----
17
tcg/aarch64/tcg-target.c.inc | 36 ++++++++-----
18
tcg/arm/tcg-target.c.inc | 83 +++++++++++++++--------------
19
tcg/i386/tcg-target.c.inc | 91 ++++++++++++++++++++------------
20
tcg/loongarch64/tcg-target.c.inc | 24 ++++++---
21
tcg/mips/tcg-target.c.inc | 66 ++++++++++++++---------
22
tcg/ppc/tcg-target.c.inc | 91 +++++++++++++++++++-------------
23
tcg/riscv/tcg-target.c.inc | 24 ++++++---
24
tcg/s390x/tcg-target.c.inc | 36 ++++++++-----
25
tcg/sparc64/tcg-target.c.inc | 24 ++++++---
26
tcg/tci/tcg-target.c.inc | 44 ++++++++-------
27
15 files changed, 468 insertions(+), 262 deletions(-)
28
29
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/tcg/tcg-opc.h
32
+++ b/include/tcg/tcg-opc.h
33
@@ -XXX,XX +XXX,XX @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
34
DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
35
DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
36
37
-#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
38
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
39
40
/* QEMU specific */
41
@@ -XXX,XX +XXX,XX @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
42
DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
43
DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
44
45
-DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
46
+/* Replicate ld/st ops for 32 and 64-bit guest addresses. */
47
+DEF(qemu_ld_a32_i32, 1, 1, 1,
48
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
49
-DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,
50
+DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
51
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
52
-DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
53
+DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
54
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
55
-DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
56
+DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
57
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
58
+
59
+DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
60
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
61
+DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
62
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
63
+DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
64
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
65
+DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
66
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
67
68
/* Only used by i386 to cope with stupid register constraints. */
69
-DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
70
+DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
72
+ IMPL(TCG_TARGET_HAS_qemu_st8_i32))
73
+DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
74
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
75
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
76
77
/* Only for 64-bit hosts at the moment. */
78
-DEF(qemu_ld_i128, 2, 1, 1,
79
+DEF(qemu_ld_a32_i128, 2, 1, 1,
80
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
81
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
82
-DEF(qemu_st_i128, 0, 3, 1,
83
+DEF(qemu_ld_a64_i128, 2, 1, 1,
84
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
85
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
86
+DEF(qemu_st_a32_i128, 0, 3, 1,
87
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
88
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
89
+DEF(qemu_st_a64_i128, 0, 3, 1,
90
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
91
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
92
93
@@ -XXX,XX +XXX,XX @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
94
DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
95
#endif
96
97
-#undef TLADDR_ARGS
98
#undef DATA64_ARGS
99
#undef IMPL
100
#undef IMPL64
101
diff --git a/tcg/optimize.c b/tcg/optimize.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/tcg/optimize.c
104
+++ b/tcg/optimize.c
105
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
106
CASE_OP_32_64_VEC(orc):
107
done = fold_orc(&ctx, op);
108
break;
109
- case INDEX_op_qemu_ld_i32:
110
- case INDEX_op_qemu_ld_i64:
111
+ case INDEX_op_qemu_ld_a32_i32:
112
+ case INDEX_op_qemu_ld_a64_i32:
113
+ case INDEX_op_qemu_ld_a32_i64:
114
+ case INDEX_op_qemu_ld_a64_i64:
115
+ case INDEX_op_qemu_ld_a32_i128:
116
+ case INDEX_op_qemu_ld_a64_i128:
117
done = fold_qemu_ld(&ctx, op);
118
break;
119
- case INDEX_op_qemu_st_i32:
120
- case INDEX_op_qemu_st8_i32:
121
- case INDEX_op_qemu_st_i64:
122
+ case INDEX_op_qemu_st8_a32_i32:
123
+ case INDEX_op_qemu_st8_a64_i32:
124
+ case INDEX_op_qemu_st_a32_i32:
125
+ case INDEX_op_qemu_st_a64_i32:
126
+ case INDEX_op_qemu_st_a32_i64:
127
+ case INDEX_op_qemu_st_a64_i64:
128
+ case INDEX_op_qemu_st_a32_i128:
129
+ case INDEX_op_qemu_st_a64_i128:
130
done = fold_qemu_st(&ctx, op);
131
break;
132
CASE_OP_32_64(rem):
133
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/tcg/tcg-op-ldst.c
136
+++ b/tcg/tcg-op-ldst.c
137
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
138
MemOp orig_memop;
139
MemOpIdx orig_oi, oi;
140
TCGv_i64 copy_addr;
141
+ TCGOpcode opc;
142
143
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
144
orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
145
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
146
}
147
148
copy_addr = plugin_maybe_preserve_addr(addr);
149
- gen_ldst(INDEX_op_qemu_ld_i32, tcgv_i32_temp(val), NULL, addr, oi);
150
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
151
+ opc = INDEX_op_qemu_ld_a32_i32;
152
+ } else {
153
+ opc = INDEX_op_qemu_ld_a64_i32;
154
+ }
155
+ gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
156
plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
157
158
if ((orig_memop ^ memop) & MO_BSWAP) {
159
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
160
}
161
162
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
163
- opc = INDEX_op_qemu_st8_i32;
164
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
165
+ opc = INDEX_op_qemu_st8_a32_i32;
166
+ } else {
167
+ opc = INDEX_op_qemu_st8_a64_i32;
168
+ }
169
} else {
170
- opc = INDEX_op_qemu_st_i32;
171
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
172
+ opc = INDEX_op_qemu_st_a32_i32;
173
+ } else {
174
+ opc = INDEX_op_qemu_st_a64_i32;
175
+ }
176
}
177
gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
178
plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
179
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
180
MemOp orig_memop;
181
MemOpIdx orig_oi, oi;
182
TCGv_i64 copy_addr;
183
+ TCGOpcode opc;
184
185
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
186
tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
187
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
188
}
189
190
copy_addr = plugin_maybe_preserve_addr(addr);
191
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
192
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
193
+ opc = INDEX_op_qemu_ld_a32_i64;
194
+ } else {
195
+ opc = INDEX_op_qemu_ld_a64_i64;
196
+ }
197
+ gen_ldst_i64(opc, val, addr, oi);
198
plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
199
200
if ((orig_memop ^ memop) & MO_BSWAP) {
201
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
202
{
203
TCGv_i64 swap = NULL;
204
MemOpIdx orig_oi, oi;
205
+ TCGOpcode opc;
206
207
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
208
tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
209
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
210
oi = make_memop_idx(memop, idx);
211
}
212
213
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
214
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
215
+ opc = INDEX_op_qemu_st_a32_i64;
216
+ } else {
217
+ opc = INDEX_op_qemu_st_a64_i64;
218
+ }
219
+ gen_ldst_i64(opc, val, addr, oi);
220
plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
221
222
if (swap) {
223
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
224
{
225
const MemOpIdx orig_oi = make_memop_idx(memop, idx);
226
TCGv_i64 ext_addr = NULL;
227
+ TCGOpcode opc;
228
229
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
230
231
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
232
hi = TCGV128_HIGH(val);
233
}
234
235
- gen_ldst(INDEX_op_qemu_ld_i128, tcgv_i64_temp(lo),
236
- tcgv_i64_temp(hi), addr, oi);
237
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
238
+ opc = INDEX_op_qemu_ld_a32_i128;
239
+ } else {
240
+ opc = INDEX_op_qemu_ld_a64_i128;
241
+ }
242
+ gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
243
244
if (need_bswap) {
245
tcg_gen_bswap64_i64(lo, lo);
246
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
247
canonicalize_memop_i128_as_i64(mop, memop);
248
need_bswap = (mop[0] ^ memop) & MO_BSWAP;
249
250
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
251
+ opc = INDEX_op_qemu_ld_a32_i64;
252
+ } else {
253
+ opc = INDEX_op_qemu_ld_a64_i64;
254
+ }
255
+
256
/*
257
* Since there are no global TCGv_i128, there is no visible state
258
* changed if the second load faults. Load directly into the two
259
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
260
}
261
262
oi = make_memop_idx(mop[0], idx);
263
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, oi);
264
+ gen_ldst_i64(opc, x, addr, oi);
265
266
if (need_bswap) {
267
tcg_gen_bswap64_i64(x, x);
268
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
269
addr_p8 = tcgv_i64_temp(t);
270
}
271
272
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, oi);
273
+ gen_ldst_i64(opc, y, addr_p8, oi);
274
tcg_temp_free_internal(addr_p8);
275
276
if (need_bswap) {
277
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
278
{
279
const MemOpIdx orig_oi = make_memop_idx(memop, idx);
280
TCGv_i64 ext_addr = NULL;
281
+ TCGOpcode opc;
282
283
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
284
285
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
286
hi = TCGV128_HIGH(val);
287
}
288
289
- gen_ldst(INDEX_op_qemu_st_i128, tcgv_i64_temp(lo),
290
- tcgv_i64_temp(hi), addr, oi);
291
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
292
+ opc = INDEX_op_qemu_st_a32_i128;
293
+ } else {
294
+ opc = INDEX_op_qemu_st_a64_i128;
295
+ }
296
+ gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
297
298
if (need_bswap) {
299
tcg_temp_free_i64(lo);
300
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
301
302
canonicalize_memop_i128_as_i64(mop, memop);
303
304
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
305
+ opc = INDEX_op_qemu_st_a32_i64;
306
+ } else {
307
+ opc = INDEX_op_qemu_st_a64_i64;
308
+ }
309
+
310
if ((memop & MO_BSWAP) == MO_LE) {
311
x = TCGV128_LOW(val);
312
y = TCGV128_HIGH(val);
313
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
314
tcg_gen_bswap64_i64(b, x);
315
x = b;
316
}
317
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
318
- make_memop_idx(mop[0], idx));
319
+
320
+ gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
321
322
if (tcg_ctx->addr_type == TCG_TYPE_I32) {
323
TCGv_i32 t = tcg_temp_ebb_new_i32();
324
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
325
326
if (b) {
327
tcg_gen_bswap64_i64(b, y);
328
- y = b;
329
- }
330
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
331
- make_memop_idx(mop[1], idx));
332
-
333
- if (b) {
334
+ gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
335
tcg_temp_free_i64(b);
336
+ } else {
337
+ gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
338
}
339
tcg_temp_free_internal(addr_p8);
340
} else {
341
diff --git a/tcg/tcg.c b/tcg/tcg.c
342
index XXXXXXX..XXXXXXX 100644
343
--- a/tcg/tcg.c
344
+++ b/tcg/tcg.c
345
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
346
case INDEX_op_exit_tb:
347
case INDEX_op_goto_tb:
348
case INDEX_op_goto_ptr:
349
- case INDEX_op_qemu_ld_i32:
350
- case INDEX_op_qemu_st_i32:
351
- case INDEX_op_qemu_ld_i64:
352
- case INDEX_op_qemu_st_i64:
353
+ case INDEX_op_qemu_ld_a32_i32:
354
+ case INDEX_op_qemu_ld_a64_i32:
355
+ case INDEX_op_qemu_st_a32_i32:
356
+ case INDEX_op_qemu_st_a64_i32:
357
+ case INDEX_op_qemu_ld_a32_i64:
358
+ case INDEX_op_qemu_ld_a64_i64:
359
+ case INDEX_op_qemu_st_a32_i64:
360
+ case INDEX_op_qemu_st_a64_i64:
361
return true;
362
363
- case INDEX_op_qemu_st8_i32:
364
+ case INDEX_op_qemu_st8_a32_i32:
365
+ case INDEX_op_qemu_st8_a64_i32:
366
return TCG_TARGET_HAS_qemu_st8_i32;
367
368
- case INDEX_op_qemu_ld_i128:
369
- case INDEX_op_qemu_st_i128:
370
+ case INDEX_op_qemu_ld_a32_i128:
371
+ case INDEX_op_qemu_ld_a64_i128:
372
+ case INDEX_op_qemu_st_a32_i128:
373
+ case INDEX_op_qemu_st_a64_i128:
374
return TCG_TARGET_HAS_qemu_ldst_i128;
375
376
case INDEX_op_mov_i32:
377
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
378
}
379
i = 1;
380
break;
381
- case INDEX_op_qemu_ld_i32:
382
- case INDEX_op_qemu_st_i32:
383
- case INDEX_op_qemu_st8_i32:
384
- case INDEX_op_qemu_ld_i64:
385
- case INDEX_op_qemu_st_i64:
386
- case INDEX_op_qemu_ld_i128:
387
- case INDEX_op_qemu_st_i128:
388
+ case INDEX_op_qemu_ld_a32_i32:
389
+ case INDEX_op_qemu_ld_a64_i32:
390
+ case INDEX_op_qemu_st_a32_i32:
391
+ case INDEX_op_qemu_st_a64_i32:
392
+ case INDEX_op_qemu_st8_a32_i32:
393
+ case INDEX_op_qemu_st8_a64_i32:
394
+ case INDEX_op_qemu_ld_a32_i64:
395
+ case INDEX_op_qemu_ld_a64_i64:
396
+ case INDEX_op_qemu_st_a32_i64:
397
+ case INDEX_op_qemu_st_a64_i64:
398
+ case INDEX_op_qemu_ld_a32_i128:
399
+ case INDEX_op_qemu_ld_a64_i128:
400
+ case INDEX_op_qemu_st_a32_i128:
401
+ case INDEX_op_qemu_st_a64_i128:
402
{
403
const char *s_al, *s_op, *s_at;
404
MemOpIdx oi = op->args[k++];
405
diff --git a/tcg/tci.c b/tcg/tci.c
406
index XXXXXXX..XXXXXXX 100644
407
--- a/tcg/tci.c
408
+++ b/tcg/tci.c
409
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
410
tb_ptr = ptr;
411
break;
412
413
- case INDEX_op_qemu_ld_i32:
414
+ case INDEX_op_qemu_ld_a32_i32:
415
+ case INDEX_op_qemu_ld_a64_i32:
416
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
417
tci_args_rrm(insn, &r0, &r1, &oi);
418
taddr = regs[r1];
419
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
420
regs[r0] = tmp32;
421
break;
422
423
- case INDEX_op_qemu_ld_i64:
424
+ case INDEX_op_qemu_ld_a32_i64:
425
+ case INDEX_op_qemu_ld_a64_i64:
426
if (TCG_TARGET_REG_BITS == 64) {
427
tci_args_rrm(insn, &r0, &r1, &oi);
428
taddr = regs[r1];
429
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
430
}
431
break;
432
433
- case INDEX_op_qemu_st_i32:
434
+ case INDEX_op_qemu_st_a32_i32:
435
+ case INDEX_op_qemu_st_a64_i32:
436
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
437
tci_args_rrm(insn, &r0, &r1, &oi);
438
taddr = regs[r1];
439
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
440
tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
441
break;
442
443
- case INDEX_op_qemu_st_i64:
444
+ case INDEX_op_qemu_st_a32_i64:
445
+ case INDEX_op_qemu_st_a64_i64:
446
if (TCG_TARGET_REG_BITS == 64) {
447
tci_args_rrm(insn, &r0, &r1, &oi);
448
taddr = regs[r1];
449
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
450
str_r(r3), str_r(r4), str_r(r5));
451
break;
452
453
- case INDEX_op_qemu_ld_i64:
454
- case INDEX_op_qemu_st_i64:
455
- len = DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
456
+ case INDEX_op_qemu_ld_a32_i32:
457
+ case INDEX_op_qemu_st_a32_i32:
458
+ len = 1 + 1;
459
+ goto do_qemu_ldst;
460
+ case INDEX_op_qemu_ld_a32_i64:
461
+ case INDEX_op_qemu_st_a32_i64:
462
+ case INDEX_op_qemu_ld_a64_i32:
463
+ case INDEX_op_qemu_st_a64_i32:
464
+ len = 1 + DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
465
+ goto do_qemu_ldst;
466
+ case INDEX_op_qemu_ld_a64_i64:
467
+ case INDEX_op_qemu_st_a64_i64:
468
+ len = 2 * DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
469
goto do_qemu_ldst;
470
- case INDEX_op_qemu_ld_i32:
471
- case INDEX_op_qemu_st_i32:
472
- len = 1;
473
do_qemu_ldst:
474
- len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS);
475
switch (len) {
476
case 2:
477
tci_args_rrm(insn, &r0, &r1, &oi);
478
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
479
index XXXXXXX..XXXXXXX 100644
480
--- a/tcg/aarch64/tcg-target.c.inc
481
+++ b/tcg/aarch64/tcg-target.c.inc
482
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
483
tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
484
break;
485
486
- case INDEX_op_qemu_ld_i32:
487
- case INDEX_op_qemu_ld_i64:
488
+ case INDEX_op_qemu_ld_a32_i32:
489
+ case INDEX_op_qemu_ld_a64_i32:
490
+ case INDEX_op_qemu_ld_a32_i64:
491
+ case INDEX_op_qemu_ld_a64_i64:
492
tcg_out_qemu_ld(s, a0, a1, a2, ext);
493
break;
494
- case INDEX_op_qemu_st_i32:
495
- case INDEX_op_qemu_st_i64:
496
+ case INDEX_op_qemu_st_a32_i32:
497
+ case INDEX_op_qemu_st_a64_i32:
498
+ case INDEX_op_qemu_st_a32_i64:
499
+ case INDEX_op_qemu_st_a64_i64:
500
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
501
break;
502
- case INDEX_op_qemu_ld_i128:
503
+ case INDEX_op_qemu_ld_a32_i128:
504
+ case INDEX_op_qemu_ld_a64_i128:
505
tcg_out_qemu_ld128(s, a0, a1, a2, args[3]);
506
break;
507
- case INDEX_op_qemu_st_i128:
508
+ case INDEX_op_qemu_st_a32_i128:
509
+ case INDEX_op_qemu_st_a64_i128:
510
tcg_out_qemu_st128(s, REG0(0), REG0(1), a2, args[3]);
511
break;
512
513
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
514
case INDEX_op_movcond_i64:
515
return C_O1_I4(r, r, rA, rZ, rZ);
516
517
- case INDEX_op_qemu_ld_i32:
518
- case INDEX_op_qemu_ld_i64:
519
+ case INDEX_op_qemu_ld_a32_i32:
520
+ case INDEX_op_qemu_ld_a64_i32:
521
+ case INDEX_op_qemu_ld_a32_i64:
522
+ case INDEX_op_qemu_ld_a64_i64:
523
return C_O1_I1(r, l);
524
- case INDEX_op_qemu_ld_i128:
525
+ case INDEX_op_qemu_ld_a32_i128:
526
+ case INDEX_op_qemu_ld_a64_i128:
527
return C_O2_I1(r, r, l);
528
- case INDEX_op_qemu_st_i32:
529
- case INDEX_op_qemu_st_i64:
530
+ case INDEX_op_qemu_st_a32_i32:
531
+ case INDEX_op_qemu_st_a64_i32:
532
+ case INDEX_op_qemu_st_a32_i64:
533
+ case INDEX_op_qemu_st_a64_i64:
534
return C_O0_I2(lZ, l);
535
- case INDEX_op_qemu_st_i128:
536
+ case INDEX_op_qemu_st_a32_i128:
537
+ case INDEX_op_qemu_st_a64_i128:
538
return C_O0_I3(lZ, lZ, l);
539
540
case INDEX_op_deposit_i32:
541
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
542
index XXXXXXX..XXXXXXX 100644
543
--- a/tcg/arm/tcg-target.c.inc
544
+++ b/tcg/arm/tcg-target.c.inc
545
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
546
ARITH_MOV, args[0], 0, 0);
547
break;
548
549
- case INDEX_op_qemu_ld_i32:
550
- if (TARGET_LONG_BITS == 32) {
551
- tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
552
- args[2], TCG_TYPE_I32);
553
- } else {
554
- tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
555
- args[3], TCG_TYPE_I32);
556
- }
557
+ case INDEX_op_qemu_ld_a32_i32:
558
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
559
break;
560
- case INDEX_op_qemu_ld_i64:
561
- if (TARGET_LONG_BITS == 32) {
562
- tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
563
- args[3], TCG_TYPE_I64);
564
- } else {
565
- tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
566
- args[4], TCG_TYPE_I64);
567
- }
568
+ case INDEX_op_qemu_ld_a64_i32:
569
+ tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
570
+ args[3], TCG_TYPE_I32);
571
break;
572
- case INDEX_op_qemu_st_i32:
573
- if (TARGET_LONG_BITS == 32) {
574
- tcg_out_qemu_st(s, args[0], -1, args[1], -1,
575
- args[2], TCG_TYPE_I32);
576
- } else {
577
- tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
578
- args[3], TCG_TYPE_I32);
579
- }
580
+ case INDEX_op_qemu_ld_a32_i64:
581
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
582
+ args[3], TCG_TYPE_I64);
583
break;
584
- case INDEX_op_qemu_st_i64:
585
- if (TARGET_LONG_BITS == 32) {
586
- tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
587
- args[3], TCG_TYPE_I64);
588
- } else {
589
- tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
590
- args[4], TCG_TYPE_I64);
591
- }
592
+ case INDEX_op_qemu_ld_a64_i64:
593
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
594
+ args[4], TCG_TYPE_I64);
595
+ break;
596
+
597
+ case INDEX_op_qemu_st_a32_i32:
598
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
599
+ break;
600
+ case INDEX_op_qemu_st_a64_i32:
601
+ tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
602
+ args[3], TCG_TYPE_I32);
603
+ break;
604
+ case INDEX_op_qemu_st_a32_i64:
605
+ tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
606
+ args[3], TCG_TYPE_I64);
607
+ break;
608
+ case INDEX_op_qemu_st_a64_i64:
609
+ tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
610
+ args[4], TCG_TYPE_I64);
611
break;
612
613
case INDEX_op_bswap16_i32:
614
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
615
case INDEX_op_setcond2_i32:
616
return C_O1_I4(r, r, r, rI, rI);
617
618
- case INDEX_op_qemu_ld_i32:
619
- return TARGET_LONG_BITS == 32 ? C_O1_I1(r, q) : C_O1_I2(r, q, q);
620
- case INDEX_op_qemu_ld_i64:
621
- return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, q) : C_O2_I2(e, p, q, q);
622
- case INDEX_op_qemu_st_i32:
623
- return TARGET_LONG_BITS == 32 ? C_O0_I2(q, q) : C_O0_I3(q, q, q);
624
- case INDEX_op_qemu_st_i64:
625
- return TARGET_LONG_BITS == 32 ? C_O0_I3(Q, p, q) : C_O0_I4(Q, p, q, q);
626
+ case INDEX_op_qemu_ld_a32_i32:
627
+ return C_O1_I1(r, q);
628
+ case INDEX_op_qemu_ld_a64_i32:
629
+ return C_O1_I2(r, q, q);
630
+ case INDEX_op_qemu_ld_a32_i64:
631
+ return C_O2_I1(e, p, q);
632
+ case INDEX_op_qemu_ld_a64_i64:
633
+ return C_O2_I2(e, p, q, q);
634
+ case INDEX_op_qemu_st_a32_i32:
635
+ return C_O0_I2(q, q);
636
+ case INDEX_op_qemu_st_a64_i32:
637
+ return C_O0_I3(q, q, q);
638
+ case INDEX_op_qemu_st_a32_i64:
639
+ return C_O0_I3(Q, p, q);
640
+ case INDEX_op_qemu_st_a64_i64:
641
+ return C_O0_I4(Q, p, q, q);
642
643
case INDEX_op_st_vec:
644
return C_O0_I2(w, r);
645
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
646
index XXXXXXX..XXXXXXX 100644
647
--- a/tcg/i386/tcg-target.c.inc
648
+++ b/tcg/i386/tcg-target.c.inc
649
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
650
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
651
break;
652
653
- case INDEX_op_qemu_ld_i32:
654
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
655
- tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
656
- } else {
657
+ case INDEX_op_qemu_ld_a64_i32:
658
+ if (TCG_TARGET_REG_BITS == 32) {
659
tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
660
+ break;
661
}
662
+ /* fall through */
663
+ case INDEX_op_qemu_ld_a32_i32:
664
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
665
break;
666
- case INDEX_op_qemu_ld_i64:
667
+ case INDEX_op_qemu_ld_a32_i64:
668
if (TCG_TARGET_REG_BITS == 64) {
669
tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
670
- } else if (TARGET_LONG_BITS == 32) {
671
+ } else {
672
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
673
+ }
674
+ break;
675
+ case INDEX_op_qemu_ld_a64_i64:
676
+ if (TCG_TARGET_REG_BITS == 64) {
677
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
678
} else {
679
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
680
}
681
break;
682
- case INDEX_op_qemu_ld_i128:
683
+ case INDEX_op_qemu_ld_a32_i128:
684
+ case INDEX_op_qemu_ld_a64_i128:
685
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
686
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
687
break;
688
- case INDEX_op_qemu_st_i32:
689
- case INDEX_op_qemu_st8_i32:
690
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
691
- tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
692
- } else {
693
+
694
+ case INDEX_op_qemu_st_a64_i32:
695
+ case INDEX_op_qemu_st8_a64_i32:
696
+ if (TCG_TARGET_REG_BITS == 32) {
697
tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
698
+ break;
699
}
700
+ /* fall through */
701
+ case INDEX_op_qemu_st_a32_i32:
702
+ case INDEX_op_qemu_st8_a32_i32:
703
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
704
break;
705
- case INDEX_op_qemu_st_i64:
706
+ case INDEX_op_qemu_st_a32_i64:
707
if (TCG_TARGET_REG_BITS == 64) {
708
tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
709
- } else if (TARGET_LONG_BITS == 32) {
710
+ } else {
711
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
712
+ }
713
+ break;
714
+ case INDEX_op_qemu_st_a64_i64:
715
+ if (TCG_TARGET_REG_BITS == 64) {
716
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
717
} else {
718
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
719
}
720
break;
721
- case INDEX_op_qemu_st_i128:
722
+ case INDEX_op_qemu_st_a32_i128:
723
+ case INDEX_op_qemu_st_a64_i128:
724
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
725
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
726
break;
727
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
728
case INDEX_op_clz_i64:
729
return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
730
731
- case INDEX_op_qemu_ld_i32:
732
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
733
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
734
+ case INDEX_op_qemu_ld_a32_i32:
735
+ return C_O1_I1(r, L);
736
+ case INDEX_op_qemu_ld_a64_i32:
737
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L);
738
739
- case INDEX_op_qemu_st_i32:
740
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
741
- ? C_O0_I2(L, L) : C_O0_I3(L, L, L));
742
- case INDEX_op_qemu_st8_i32:
743
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
744
- ? C_O0_I2(s, L) : C_O0_I3(s, L, L));
745
+ case INDEX_op_qemu_st_a32_i32:
746
+ return C_O0_I2(L, L);
747
+ case INDEX_op_qemu_st_a64_i32:
748
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
749
+ case INDEX_op_qemu_st8_a32_i32:
750
+ return C_O0_I2(s, L);
751
+ case INDEX_op_qemu_st8_a64_i32:
752
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L);
753
754
- case INDEX_op_qemu_ld_i64:
755
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
756
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
757
- : C_O2_I2(r, r, L, L));
758
+ case INDEX_op_qemu_ld_a32_i64:
759
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L);
760
+ case INDEX_op_qemu_ld_a64_i64:
761
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L);
762
763
- case INDEX_op_qemu_st_i64:
764
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L)
765
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
766
- : C_O0_I4(L, L, L, L));
767
+ case INDEX_op_qemu_st_a32_i64:
768
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
769
+ case INDEX_op_qemu_st_a64_i64:
770
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
771
772
- case INDEX_op_qemu_ld_i128:
773
+ case INDEX_op_qemu_ld_a32_i128:
774
+ case INDEX_op_qemu_ld_a64_i128:
775
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
776
return C_O2_I1(r, r, L);
777
- case INDEX_op_qemu_st_i128:
778
+ case INDEX_op_qemu_st_a32_i128:
779
+ case INDEX_op_qemu_st_a64_i128:
780
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
781
return C_O0_I3(L, L, L);
782
783
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
784
index XXXXXXX..XXXXXXX 100644
785
--- a/tcg/loongarch64/tcg-target.c.inc
786
+++ b/tcg/loongarch64/tcg-target.c.inc
787
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
788
tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
789
break;
790
791
- case INDEX_op_qemu_ld_i32:
792
+ case INDEX_op_qemu_ld_a32_i32:
793
+ case INDEX_op_qemu_ld_a64_i32:
794
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
795
break;
796
- case INDEX_op_qemu_ld_i64:
797
+ case INDEX_op_qemu_ld_a32_i64:
798
+ case INDEX_op_qemu_ld_a64_i64:
799
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
800
break;
801
- case INDEX_op_qemu_st_i32:
802
+ case INDEX_op_qemu_st_a32_i32:
803
+ case INDEX_op_qemu_st_a64_i32:
804
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
805
break;
806
- case INDEX_op_qemu_st_i64:
807
+ case INDEX_op_qemu_st_a32_i64:
808
+ case INDEX_op_qemu_st_a64_i64:
809
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
810
break;
811
812
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
813
case INDEX_op_st32_i64:
814
case INDEX_op_st_i32:
815
case INDEX_op_st_i64:
816
- case INDEX_op_qemu_st_i32:
817
- case INDEX_op_qemu_st_i64:
818
+ case INDEX_op_qemu_st_a32_i32:
819
+ case INDEX_op_qemu_st_a64_i32:
820
+ case INDEX_op_qemu_st_a32_i64:
821
+ case INDEX_op_qemu_st_a64_i64:
822
return C_O0_I2(rZ, r);
823
824
case INDEX_op_brcond_i32:
825
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
826
case INDEX_op_ld32u_i64:
827
case INDEX_op_ld_i32:
828
case INDEX_op_ld_i64:
829
- case INDEX_op_qemu_ld_i32:
830
- case INDEX_op_qemu_ld_i64:
831
+ case INDEX_op_qemu_ld_a32_i32:
832
+ case INDEX_op_qemu_ld_a64_i32:
833
+ case INDEX_op_qemu_ld_a32_i64:
834
+ case INDEX_op_qemu_ld_a64_i64:
835
return C_O1_I1(r, r);
836
837
case INDEX_op_andc_i32:
838
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
839
index XXXXXXX..XXXXXXX 100644
840
--- a/tcg/mips/tcg-target.c.inc
841
+++ b/tcg/mips/tcg-target.c.inc
842
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
843
tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
844
break;
845
846
- case INDEX_op_qemu_ld_i32:
847
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
848
- tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
849
- } else {
850
+ case INDEX_op_qemu_ld_a64_i32:
851
+ if (TCG_TARGET_REG_BITS == 32) {
852
tcg_out_qemu_ld(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
853
+ break;
854
}
855
+ /* fall through */
856
+ case INDEX_op_qemu_ld_a32_i32:
857
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
858
break;
859
- case INDEX_op_qemu_ld_i64:
860
+ case INDEX_op_qemu_ld_a32_i64:
861
if (TCG_TARGET_REG_BITS == 64) {
862
tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
863
- } else if (TARGET_LONG_BITS == 32) {
864
+ } else {
865
tcg_out_qemu_ld(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
866
+ }
867
+ break;
868
+ case INDEX_op_qemu_ld_a64_i64:
869
+ if (TCG_TARGET_REG_BITS == 64) {
870
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
871
} else {
872
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
873
}
874
break;
875
- case INDEX_op_qemu_st_i32:
876
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
877
- tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
878
- } else {
879
+
880
+ case INDEX_op_qemu_st_a64_i32:
881
+ if (TCG_TARGET_REG_BITS == 32) {
882
tcg_out_qemu_st(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
883
+ break;
884
}
885
+ /* fall through */
886
+ case INDEX_op_qemu_st_a32_i32:
887
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
888
break;
889
- case INDEX_op_qemu_st_i64:
890
+ case INDEX_op_qemu_st_a32_i64:
891
if (TCG_TARGET_REG_BITS == 64) {
892
tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
893
- } else if (TARGET_LONG_BITS == 32) {
894
+ } else {
895
tcg_out_qemu_st(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
896
+ }
897
+ break;
898
+ case INDEX_op_qemu_st_a64_i64:
899
+ if (TCG_TARGET_REG_BITS == 64) {
900
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
901
} else {
902
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
903
}
904
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
905
case INDEX_op_brcond2_i32:
906
return C_O0_I4(rZ, rZ, rZ, rZ);
907
908
- case INDEX_op_qemu_ld_i32:
909
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
910
- ? C_O1_I1(r, r) : C_O1_I2(r, r, r));
911
- case INDEX_op_qemu_st_i32:
912
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
913
- ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
914
- case INDEX_op_qemu_ld_i64:
915
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
916
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
917
- : C_O2_I2(r, r, r, r));
918
- case INDEX_op_qemu_st_i64:
919
+ case INDEX_op_qemu_ld_a32_i32:
920
+ return C_O1_I1(r, r);
921
+ case INDEX_op_qemu_ld_a64_i32:
922
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
923
+ case INDEX_op_qemu_st_a32_i32:
924
+ return C_O0_I2(rZ, r);
925
+ case INDEX_op_qemu_st_a64_i32:
926
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r);
927
+ case INDEX_op_qemu_ld_a32_i64:
928
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
929
+ case INDEX_op_qemu_ld_a64_i64:
930
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
931
+ case INDEX_op_qemu_st_a32_i64:
932
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, rZ, r);
933
+ case INDEX_op_qemu_st_a64_i64:
934
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
935
- : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
936
: C_O0_I4(rZ, rZ, r, r));
937
938
default:
939
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
940
index XXXXXXX..XXXXXXX 100644
941
--- a/tcg/ppc/tcg-target.c.inc
942
+++ b/tcg/ppc/tcg-target.c.inc
943
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
944
tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
945
break;
946
947
- case INDEX_op_qemu_ld_i32:
948
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
949
- tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
950
- args[2], TCG_TYPE_I32);
951
- } else {
952
+ case INDEX_op_qemu_ld_a64_i32:
953
+ if (TCG_TARGET_REG_BITS == 32) {
954
tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
955
args[3], TCG_TYPE_I32);
956
+ break;
957
}
958
+ /* fall through */
959
+ case INDEX_op_qemu_ld_a32_i32:
960
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
961
break;
962
- case INDEX_op_qemu_ld_i64:
963
+ case INDEX_op_qemu_ld_a32_i64:
964
if (TCG_TARGET_REG_BITS == 64) {
965
tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
966
args[2], TCG_TYPE_I64);
967
- } else if (TARGET_LONG_BITS == 32) {
968
+ } else {
969
tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
970
args[3], TCG_TYPE_I64);
971
+ }
972
+ break;
973
+ case INDEX_op_qemu_ld_a64_i64:
974
+ if (TCG_TARGET_REG_BITS == 64) {
975
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
976
+ args[2], TCG_TYPE_I64);
977
} else {
978
tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
979
args[4], TCG_TYPE_I64);
980
}
981
break;
982
- case INDEX_op_qemu_ld_i128:
983
+ case INDEX_op_qemu_ld_a32_i128:
984
+ case INDEX_op_qemu_ld_a64_i128:
985
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
986
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
987
break;
988
989
- case INDEX_op_qemu_st_i32:
990
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
991
- tcg_out_qemu_st(s, args[0], -1, args[1], -1,
992
- args[2], TCG_TYPE_I32);
993
- } else {
994
+ case INDEX_op_qemu_st_a64_i32:
995
+ if (TCG_TARGET_REG_BITS == 32) {
996
tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
997
args[3], TCG_TYPE_I32);
998
+ break;
999
}
1000
+ /* fall through */
1001
+ case INDEX_op_qemu_st_a32_i32:
1002
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
1003
break;
1004
- case INDEX_op_qemu_st_i64:
1005
+ case INDEX_op_qemu_st_a32_i64:
1006
if (TCG_TARGET_REG_BITS == 64) {
1007
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
1008
args[2], TCG_TYPE_I64);
1009
- } else if (TARGET_LONG_BITS == 32) {
1010
+ } else {
1011
tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
1012
args[3], TCG_TYPE_I64);
1013
+ }
1014
+ break;
1015
+ case INDEX_op_qemu_st_a64_i64:
1016
+ if (TCG_TARGET_REG_BITS == 64) {
1017
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1,
1018
+ args[2], TCG_TYPE_I64);
1019
} else {
1020
tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
1021
args[4], TCG_TYPE_I64);
1022
}
1023
break;
1024
- case INDEX_op_qemu_st_i128:
1025
+ case INDEX_op_qemu_st_a32_i128:
1026
+ case INDEX_op_qemu_st_a64_i128:
1027
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1028
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
1029
break;
1030
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1031
case INDEX_op_sub2_i32:
1032
return C_O2_I4(r, r, rI, rZM, r, r);
1033
1034
- case INDEX_op_qemu_ld_i32:
1035
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
1036
- ? C_O1_I1(r, r)
1037
- : C_O1_I2(r, r, r));
1038
-
1039
- case INDEX_op_qemu_st_i32:
1040
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
1041
- ? C_O0_I2(r, r)
1042
- : C_O0_I3(r, r, r));
1043
-
1044
- case INDEX_op_qemu_ld_i64:
1045
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
1046
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
1047
- : C_O2_I2(r, r, r, r));
1048
-
1049
- case INDEX_op_qemu_st_i64:
1050
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
1051
- : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
1052
- : C_O0_I4(r, r, r, r));
1053
-
1054
- case INDEX_op_qemu_ld_i128:
1055
+ case INDEX_op_qemu_ld_a32_i32:
1056
+ return C_O1_I1(r, r);
1057
+ case INDEX_op_qemu_ld_a64_i32:
1058
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
1059
+ case INDEX_op_qemu_ld_a32_i64:
1060
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
1061
+ case INDEX_op_qemu_ld_a64_i64:
1062
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
1063
+ case INDEX_op_qemu_ld_a32_i128:
1064
+ case INDEX_op_qemu_ld_a64_i128:
1065
return C_O2_I1(o, m, r);
1066
- case INDEX_op_qemu_st_i128:
1067
+
1068
+ case INDEX_op_qemu_st_a32_i32:
1069
+ return C_O0_I2(r, r);
1070
+ case INDEX_op_qemu_st_a64_i32:
1071
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1072
+ case INDEX_op_qemu_st_a32_i64:
1073
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1074
+ case INDEX_op_qemu_st_a64_i64:
1075
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
1076
+ case INDEX_op_qemu_st_a32_i128:
1077
+ case INDEX_op_qemu_st_a64_i128:
1078
return C_O0_I3(o, m, r);
1079
1080
case INDEX_op_add_vec:
1081
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
1082
index XXXXXXX..XXXXXXX 100644
1083
--- a/tcg/riscv/tcg-target.c.inc
1084
+++ b/tcg/riscv/tcg-target.c.inc
1085
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1086
tcg_out_setcond(s, args[3], a0, a1, a2);
1087
break;
1088
1089
- case INDEX_op_qemu_ld_i32:
1090
+ case INDEX_op_qemu_ld_a32_i32:
1091
+ case INDEX_op_qemu_ld_a64_i32:
1092
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1093
break;
1094
- case INDEX_op_qemu_ld_i64:
1095
+ case INDEX_op_qemu_ld_a32_i64:
1096
+ case INDEX_op_qemu_ld_a64_i64:
1097
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1098
break;
1099
- case INDEX_op_qemu_st_i32:
1100
+ case INDEX_op_qemu_st_a32_i32:
1101
+ case INDEX_op_qemu_st_a64_i32:
1102
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1103
break;
1104
- case INDEX_op_qemu_st_i64:
1105
+ case INDEX_op_qemu_st_a32_i64:
1106
+ case INDEX_op_qemu_st_a64_i64:
1107
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1108
break;
1109
1110
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1111
case INDEX_op_sub2_i64:
1112
return C_O2_I4(r, r, rZ, rZ, rM, rM);
1113
1114
- case INDEX_op_qemu_ld_i32:
1115
- case INDEX_op_qemu_ld_i64:
1116
+ case INDEX_op_qemu_ld_a32_i32:
1117
+ case INDEX_op_qemu_ld_a64_i32:
1118
+ case INDEX_op_qemu_ld_a32_i64:
1119
+ case INDEX_op_qemu_ld_a64_i64:
1120
return C_O1_I1(r, r);
1121
- case INDEX_op_qemu_st_i32:
1122
- case INDEX_op_qemu_st_i64:
1123
+ case INDEX_op_qemu_st_a32_i32:
1124
+ case INDEX_op_qemu_st_a64_i32:
1125
+ case INDEX_op_qemu_st_a32_i64:
1126
+ case INDEX_op_qemu_st_a64_i64:
1127
return C_O0_I2(rZ, r);
1128
1129
default:
1130
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
1131
index XXXXXXX..XXXXXXX 100644
1132
--- a/tcg/s390x/tcg-target.c.inc
1133
+++ b/tcg/s390x/tcg-target.c.inc
1134
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1135
args[2], const_args[2], args[3], const_args[3], args[4]);
1136
break;
1137
1138
- case INDEX_op_qemu_ld_i32:
1139
+ case INDEX_op_qemu_ld_a32_i32:
1140
+ case INDEX_op_qemu_ld_a64_i32:
1141
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
1142
break;
1143
- case INDEX_op_qemu_ld_i64:
1144
+ case INDEX_op_qemu_ld_a32_i64:
1145
+ case INDEX_op_qemu_ld_a64_i64:
1146
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
1147
break;
1148
- case INDEX_op_qemu_st_i32:
1149
+ case INDEX_op_qemu_st_a32_i32:
1150
+ case INDEX_op_qemu_st_a64_i32:
1151
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
1152
break;
1153
- case INDEX_op_qemu_st_i64:
1154
+ case INDEX_op_qemu_st_a32_i64:
1155
+ case INDEX_op_qemu_st_a64_i64:
1156
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
1157
break;
1158
- case INDEX_op_qemu_ld_i128:
1159
+ case INDEX_op_qemu_ld_a32_i128:
1160
+ case INDEX_op_qemu_ld_a64_i128:
1161
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
1162
break;
1163
- case INDEX_op_qemu_st_i128:
1164
+ case INDEX_op_qemu_st_a32_i128:
1165
+ case INDEX_op_qemu_st_a64_i128:
1166
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
1167
break;
1168
1169
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1170
case INDEX_op_ctpop_i64:
1171
return C_O1_I1(r, r);
1172
1173
- case INDEX_op_qemu_ld_i32:
1174
- case INDEX_op_qemu_ld_i64:
1175
+ case INDEX_op_qemu_ld_a32_i32:
1176
+ case INDEX_op_qemu_ld_a64_i32:
1177
+ case INDEX_op_qemu_ld_a32_i64:
1178
+ case INDEX_op_qemu_ld_a64_i64:
1179
return C_O1_I1(r, r);
1180
- case INDEX_op_qemu_st_i64:
1181
- case INDEX_op_qemu_st_i32:
1182
+ case INDEX_op_qemu_st_a32_i64:
1183
+ case INDEX_op_qemu_st_a64_i64:
1184
+ case INDEX_op_qemu_st_a32_i32:
1185
+ case INDEX_op_qemu_st_a64_i32:
1186
return C_O0_I2(r, r);
1187
- case INDEX_op_qemu_ld_i128:
1188
+ case INDEX_op_qemu_ld_a32_i128:
1189
+ case INDEX_op_qemu_ld_a64_i128:
1190
return C_O2_I1(o, m, r);
1191
- case INDEX_op_qemu_st_i128:
1192
+ case INDEX_op_qemu_st_a32_i128:
1193
+ case INDEX_op_qemu_st_a64_i128:
1194
return C_O0_I3(o, m, r);
1195
1196
case INDEX_op_deposit_i32:
1197
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
1198
index XXXXXXX..XXXXXXX 100644
1199
--- a/tcg/sparc64/tcg-target.c.inc
1200
+++ b/tcg/sparc64/tcg-target.c.inc
1201
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1202
tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1203
break;
1204
1205
- case INDEX_op_qemu_ld_i32:
1206
+ case INDEX_op_qemu_ld_a32_i32:
1207
+ case INDEX_op_qemu_ld_a64_i32:
1208
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1209
break;
1210
- case INDEX_op_qemu_ld_i64:
1211
+ case INDEX_op_qemu_ld_a32_i64:
1212
+ case INDEX_op_qemu_ld_a64_i64:
1213
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1214
break;
1215
- case INDEX_op_qemu_st_i32:
1216
+ case INDEX_op_qemu_st_a32_i32:
1217
+ case INDEX_op_qemu_st_a64_i32:
1218
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1219
break;
1220
- case INDEX_op_qemu_st_i64:
1221
+ case INDEX_op_qemu_st_a32_i64:
1222
+ case INDEX_op_qemu_st_a64_i64:
1223
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1224
break;
1225
1226
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1227
case INDEX_op_extu_i32_i64:
1228
case INDEX_op_extrl_i64_i32:
1229
case INDEX_op_extrh_i64_i32:
1230
- case INDEX_op_qemu_ld_i32:
1231
- case INDEX_op_qemu_ld_i64:
1232
+ case INDEX_op_qemu_ld_a32_i32:
1233
+ case INDEX_op_qemu_ld_a64_i32:
1234
+ case INDEX_op_qemu_ld_a32_i64:
1235
+ case INDEX_op_qemu_ld_a64_i64:
1236
return C_O1_I1(r, r);
1237
1238
case INDEX_op_st8_i32:
1239
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1240
case INDEX_op_st_i32:
1241
case INDEX_op_st32_i64:
1242
case INDEX_op_st_i64:
1243
- case INDEX_op_qemu_st_i32:
1244
- case INDEX_op_qemu_st_i64:
1245
+ case INDEX_op_qemu_st_a32_i32:
1246
+ case INDEX_op_qemu_st_a64_i32:
1247
+ case INDEX_op_qemu_st_a32_i64:
1248
+ case INDEX_op_qemu_st_a64_i64:
1249
return C_O0_I2(rZ, r);
1250
1251
case INDEX_op_add_i32:
1252
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
1253
index XXXXXXX..XXXXXXX 100644
1254
--- a/tcg/tci/tcg-target.c.inc
1255
+++ b/tcg/tci/tcg-target.c.inc
1256
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1257
case INDEX_op_setcond2_i32:
1258
return C_O1_I4(r, r, r, r, r);
1259
1260
- case INDEX_op_qemu_ld_i32:
1261
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1262
- ? C_O1_I1(r, r)
1263
- : C_O1_I2(r, r, r));
1264
- case INDEX_op_qemu_ld_i64:
1265
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
1266
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, r)
1267
- : C_O2_I2(r, r, r, r));
1268
- case INDEX_op_qemu_st_i32:
1269
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1270
- ? C_O0_I2(r, r)
1271
- : C_O0_I3(r, r, r));
1272
- case INDEX_op_qemu_st_i64:
1273
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
1274
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(r, r, r)
1275
- : C_O0_I4(r, r, r, r));
1276
+ case INDEX_op_qemu_ld_a32_i32:
1277
+ return C_O1_I1(r, r);
1278
+ case INDEX_op_qemu_ld_a64_i32:
1279
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
1280
+ case INDEX_op_qemu_ld_a32_i64:
1281
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
1282
+ case INDEX_op_qemu_ld_a64_i64:
1283
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
1284
+ case INDEX_op_qemu_st_a32_i32:
1285
+ return C_O0_I2(r, r);
1286
+ case INDEX_op_qemu_st_a64_i32:
1287
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1288
+ case INDEX_op_qemu_st_a32_i64:
1289
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1290
+ case INDEX_op_qemu_st_a64_i64:
1291
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
1292
1293
default:
1294
g_assert_not_reached();
1295
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1296
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
1297
break;
1298
1299
- case INDEX_op_qemu_ld_i32:
1300
- case INDEX_op_qemu_st_i32:
1301
+ case INDEX_op_qemu_ld_a32_i32:
1302
+ case INDEX_op_qemu_ld_a64_i32:
1303
+ case INDEX_op_qemu_st_a32_i32:
1304
+ case INDEX_op_qemu_st_a64_i32:
1305
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
1306
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
1307
} else {
1308
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1309
}
1310
break;
1311
1312
- case INDEX_op_qemu_ld_i64:
1313
- case INDEX_op_qemu_st_i64:
1314
+ case INDEX_op_qemu_ld_a32_i64:
1315
+ case INDEX_op_qemu_ld_a64_i64:
1316
+ case INDEX_op_qemu_st_a32_i64:
1317
+ case INDEX_op_qemu_st_a64_i64:
1318
if (TCG_TARGET_REG_BITS == 64) {
1319
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
1320
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
1321
--
1322
2.34.1
1323
1324
diff view generated by jsdifflib
1
We do not need the entire CPUArchState to compute these values.
1
We now have the address size as part of the opcode, so
2
we no longer need to test TARGET_LONG_BITS. We can use
3
uint64_t for target_ulong, as passed into load/store helpers.
2
4
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
accel/tcg/cputlb.c | 15 ++++++++-------
8
tcg/tci.c | 61 +++++++++++++++++++++++++---------------
9
1 file changed, 8 insertions(+), 7 deletions(-)
9
tcg/tci/tcg-target.c.inc | 15 +++++-----
10
2 files changed, 46 insertions(+), 30 deletions(-)
10
11
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
diff --git a/tcg/tci.c b/tcg/tci.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
14
--- a/tcg/tci.c
14
+++ b/accel/tcg/cputlb.c
15
+++ b/tcg/tci.c
15
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
16
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
16
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
17
return result;
17
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
18
}
18
19
19
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
20
-static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
20
+static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
21
+static uint64_t tci_qemu_ld(CPUArchState *env, uint64_t taddr,
22
MemOpIdx oi, const void *tb_ptr)
21
{
23
{
22
- return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
24
MemOp mop = get_memop(oi);
23
+ return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
25
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
26
}
24
}
27
}
25
28
26
-static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
29
-static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
27
+static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
30
+static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val,
31
MemOpIdx oi, const void *tb_ptr)
28
{
32
{
29
- return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
33
MemOp mop = get_memop(oi);
30
+ return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
34
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
35
TCGReg r0, r1, r2, r3, r4, r5;
36
tcg_target_ulong t1;
37
TCGCond condition;
38
- target_ulong taddr;
39
uint8_t pos, len;
40
uint32_t tmp32;
41
- uint64_t tmp64;
42
+ uint64_t tmp64, taddr;
43
uint64_t T1, T2;
44
MemOpIdx oi;
45
int32_t ofs;
46
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
47
break;
48
49
case INDEX_op_qemu_ld_a32_i32:
50
+ tci_args_rrm(insn, &r0, &r1, &oi);
51
+ taddr = (uint32_t)regs[r1];
52
+ goto do_ld_i32;
53
case INDEX_op_qemu_ld_a64_i32:
54
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
55
+ if (TCG_TARGET_REG_BITS == 64) {
56
tci_args_rrm(insn, &r0, &r1, &oi);
57
taddr = regs[r1];
58
} else {
59
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
60
taddr = tci_uint64(regs[r2], regs[r1]);
61
}
62
- tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr);
63
- regs[r0] = tmp32;
64
+ do_ld_i32:
65
+ regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
66
break;
67
68
case INDEX_op_qemu_ld_a32_i64:
69
+ if (TCG_TARGET_REG_BITS == 64) {
70
+ tci_args_rrm(insn, &r0, &r1, &oi);
71
+ taddr = (uint32_t)regs[r1];
72
+ } else {
73
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
74
+ taddr = (uint32_t)regs[r2];
75
+ }
76
+ goto do_ld_i64;
77
case INDEX_op_qemu_ld_a64_i64:
78
if (TCG_TARGET_REG_BITS == 64) {
79
tci_args_rrm(insn, &r0, &r1, &oi);
80
taddr = regs[r1];
81
- } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
82
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
83
- taddr = regs[r2];
84
} else {
85
tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
86
taddr = tci_uint64(regs[r3], regs[r2]);
87
oi = regs[r4];
88
}
89
+ do_ld_i64:
90
tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
91
if (TCG_TARGET_REG_BITS == 32) {
92
tci_write_reg64(regs, r1, r0, tmp64);
93
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
94
break;
95
96
case INDEX_op_qemu_st_a32_i32:
97
+ tci_args_rrm(insn, &r0, &r1, &oi);
98
+ taddr = (uint32_t)regs[r1];
99
+ goto do_st_i32;
100
case INDEX_op_qemu_st_a64_i32:
101
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
102
+ if (TCG_TARGET_REG_BITS == 64) {
103
tci_args_rrm(insn, &r0, &r1, &oi);
104
taddr = regs[r1];
105
} else {
106
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
107
taddr = tci_uint64(regs[r2], regs[r1]);
108
}
109
- tmp32 = regs[r0];
110
- tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
111
+ do_st_i32:
112
+ tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
113
break;
114
115
case INDEX_op_qemu_st_a32_i64:
116
+ if (TCG_TARGET_REG_BITS == 64) {
117
+ tci_args_rrm(insn, &r0, &r1, &oi);
118
+ tmp64 = regs[r0];
119
+ taddr = (uint32_t)regs[r1];
120
+ } else {
121
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
122
+ tmp64 = tci_uint64(regs[r1], regs[r0]);
123
+ taddr = (uint32_t)regs[r2];
124
+ }
125
+ goto do_st_i64;
126
case INDEX_op_qemu_st_a64_i64:
127
if (TCG_TARGET_REG_BITS == 64) {
128
tci_args_rrm(insn, &r0, &r1, &oi);
129
- taddr = regs[r1];
130
tmp64 = regs[r0];
131
+ taddr = regs[r1];
132
} else {
133
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
134
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
135
- taddr = regs[r2];
136
- } else {
137
- tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
138
- taddr = tci_uint64(regs[r3], regs[r2]);
139
- oi = regs[r4];
140
- }
141
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
142
tmp64 = tci_uint64(regs[r1], regs[r0]);
143
+ taddr = tci_uint64(regs[r3], regs[r2]);
144
+ oi = regs[r4];
145
}
146
+ do_st_i64:
147
tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
148
break;
149
150
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
151
index XXXXXXX..XXXXXXX 100644
152
--- a/tcg/tci/tcg-target.c.inc
153
+++ b/tcg/tci/tcg-target.c.inc
154
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
155
return false;
31
}
156
}
32
157
33
static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
158
-static void stack_bounds_check(TCGReg base, target_long offset)
34
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
159
+static void stack_bounds_check(TCGReg base, intptr_t offset)
35
static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
36
{
160
{
37
CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
161
if (base == TCG_REG_CALL_STACK) {
38
- size_t old_size = tlb_n_entries(env, mmu_idx);
162
tcg_debug_assert(offset >= 0);
39
+ size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
163
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
40
size_t rate;
164
break;
41
size_t new_size = old_size;
165
42
int64_t now = get_clock_realtime();
166
case INDEX_op_qemu_ld_a32_i32:
43
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
167
- case INDEX_op_qemu_ld_a64_i32:
44
env_tlb(env)->d[mmu_idx].large_page_addr = -1;
168
case INDEX_op_qemu_st_a32_i32:
45
env_tlb(env)->d[mmu_idx].large_page_mask = -1;
169
+ tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
46
env_tlb(env)->d[mmu_idx].vindex = 0;
170
+ break;
47
- memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
171
+ case INDEX_op_qemu_ld_a64_i32:
48
+ memset(env_tlb(env)->f[mmu_idx].table, -1,
172
case INDEX_op_qemu_st_a64_i32:
49
+ sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
173
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
50
memset(env_tlb(env)->d[mmu_idx].vtable, -1,
174
+ case INDEX_op_qemu_ld_a32_i64:
51
sizeof(env_tlb(env)->d[0].vtable));
175
+ case INDEX_op_qemu_st_a32_i64:
52
}
176
+ if (TCG_TARGET_REG_BITS == 64) {
53
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
177
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
54
qemu_spin_lock(&env_tlb(env)->c.lock);
178
} else {
55
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
179
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
56
unsigned int i;
180
}
57
- unsigned int n = tlb_n_entries(env, mmu_idx);
181
break;
58
+ unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
182
-
59
183
- case INDEX_op_qemu_ld_a32_i64:
60
for (i = 0; i < n; i++) {
184
case INDEX_op_qemu_ld_a64_i64:
61
tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
185
- case INDEX_op_qemu_st_a32_i64:
186
case INDEX_op_qemu_st_a64_i64:
187
if (TCG_TARGET_REG_BITS == 64) {
188
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
189
- } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
190
- tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
191
} else {
192
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
193
tcg_out_op_rrrrr(s, opc, args[0], args[1],
62
--
194
--
63
2.20.1
195
2.34.1
64
196
65
197
diff view generated by jsdifflib
New patch
1
Keep all 32-bit values zero extended in the register, not solely when
2
addresses are 32 bits. This eliminates a dependency on TARGET_LONG_BITS.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target.h | 6 +++---
8
1 file changed, 3 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.h
13
+++ b/tcg/i386/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
15
#define TCG_TARGET_HAS_mulsh_i32 0
16
17
#if TCG_TARGET_REG_BITS == 64
18
-/* Keep target addresses zero-extended in a register. */
19
-#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32)
20
-#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32)
21
+/* Keep 32-bit values zero-extended in a register. */
22
+#define TCG_TARGET_HAS_extrl_i64_i32 1
23
+#define TCG_TARGET_HAS_extrh_i64_i32 1
24
#define TCG_TARGET_HAS_div2_i64 1
25
#define TCG_TARGET_HAS_rot_i64 1
26
#define TCG_TARGET_HAS_ext8s_i64 1
27
--
28
2.34.1
29
30
diff view generated by jsdifflib
1
No functional change, but the smaller expressions make
1
Since TCG_TYPE_I32 values are kept zero-extended in registers, via
2
the code easier to read.
2
omission of the REXW bit, we need not extend if the register matches.
3
This is already relied upon by qemu_{ld,st}.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
accel/tcg/cputlb.c | 19 ++++++++++---------
9
tcg/i386/tcg-target.c.inc | 4 +++-
10
1 file changed, 10 insertions(+), 9 deletions(-)
10
1 file changed, 3 insertions(+), 1 deletion(-)
11
11
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/accel/tcg/cputlb.c
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
17
17
18
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
18
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
19
{
19
{
20
- tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
20
- tcg_out_ext32u(s, dest, src);
21
- env_tlb(env)->d[mmu_idx].n_used_entries = 0;
21
+ if (dest != src) {
22
- env_tlb(env)->d[mmu_idx].large_page_addr = -1;
22
+ tcg_out_ext32u(s, dest, src);
23
- env_tlb(env)->d[mmu_idx].large_page_mask = -1;
23
+ }
24
- env_tlb(env)->d[mmu_idx].vindex = 0;
25
- memset(env_tlb(env)->f[mmu_idx].table, -1,
26
- sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
27
- memset(env_tlb(env)->d[mmu_idx].vtable, -1,
28
- sizeof(env_tlb(env)->d[0].vtable));
29
+ CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
30
+ CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
31
+
32
+ tlb_mmu_resize_locked(desc, fast);
33
+ desc->n_used_entries = 0;
34
+ desc->large_page_addr = -1;
35
+ desc->large_page_mask = -1;
36
+ desc->vindex = 0;
37
+ memset(fast->table, -1, sizeof_tlb(fast));
38
+ memset(desc->vtable, -1, sizeof(desc->vtable));
39
}
24
}
40
25
41
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
26
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
42
--
27
--
43
2.20.1
28
2.34.1
44
29
45
30
diff view generated by jsdifflib
New patch
1
Because of its use on tgen_arithi, this value must be a signed
2
32-bit quantity, as that is what may be encoded in the insn.
3
The truncation of the value to unsigned for 32-bit guests is
4
done via the REX bit via 'trexw'.
1
5
6
Removes the only uses of target_ulong from this tcg backend.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/i386/tcg-target.c.inc | 4 ++--
12
1 file changed, 2 insertions(+), 2 deletions(-)
13
14
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/i386/tcg-target.c.inc
17
+++ b/tcg/i386/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
19
int trexw = 0, hrexw = 0, tlbrexw = 0;
20
unsigned mem_index = get_mmuidx(oi);
21
unsigned s_mask = (1 << s_bits) - 1;
22
- target_ulong tlb_mask;
23
+ int tlb_mask;
24
25
ldst = new_ldst_label(s);
26
ldst->is_ld = is_ld;
27
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
29
addrlo, s_mask - a_mask);
30
}
31
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
32
+ tlb_mask = TARGET_PAGE_MASK | a_mask;
33
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
34
35
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
36
--
37
2.34.1
38
39
diff view generated by jsdifflib
New patch
1
All uses can be infered from the INDEX_op_qemu_*_a{32,64}_* opcode
2
being used. Add a field into TCGLabelQemuLdst to record the usage.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target.c.inc | 8 +++-----
8
1 file changed, 3 insertions(+), 5 deletions(-)
9
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.c.inc
13
+++ b/tcg/i386/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
ldst->addrhi_reg = addrhi;
16
17
if (TCG_TARGET_REG_BITS == 64) {
18
- if (TARGET_LONG_BITS == 64) {
19
- ttype = TCG_TYPE_I64;
20
- trexw = P_REXW;
21
- }
22
+ ttype = s->addr_type;
23
+ trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
24
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
25
hrexw = P_REXW;
26
if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
27
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
ldst->label_ptr[0] = s->code_ptr;
29
s->code_ptr += 4;
30
31
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
32
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
33
/* cmp 4(TCG_REG_L0), addrhi */
34
tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
35
36
--
37
2.34.1
38
39
diff view generated by jsdifflib
New patch
1
All uses can be infered from the INDEX_op_qemu_*_a{32,64}_*
2
opcode being used.
1
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/arm/tcg-target.c.inc | 14 +++++++-------
8
1 file changed, 7 insertions(+), 7 deletions(-)
9
10
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/arm/tcg-target.c.inc
13
+++ b/tcg/arm/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
* Load the tlb comparator into R2/R3 and the fast path addend into R1.
16
*/
17
if (cmp_off == 0) {
18
- if (TARGET_LONG_BITS == 64) {
19
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
20
- } else {
21
+ if (s->addr_type == TCG_TYPE_I32) {
22
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
23
+ } else {
24
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
25
}
26
} else {
27
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
28
TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
29
- if (TARGET_LONG_BITS == 64) {
30
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
31
- } else {
32
+ if (s->addr_type == TCG_TYPE_I32) {
33
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
34
+ } else {
35
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
36
}
37
}
38
39
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
40
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
41
}
42
43
- if (TARGET_LONG_BITS == 64) {
44
+ if (s->addr_type != TCG_TYPE_I32) {
45
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
46
}
47
#else
48
--
49
2.34.1
50
51
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
Eliminate the test vs TARGET_LONG_BITS by considering this
2
predicate to be always true, and simplify accordingly.
2
3
3
To avoid scrolling each instruction when reviewing tcg
4
helpers written for the decodetree script, display the
5
.decode files (similar to header declarations) before
6
the C source (implementation of previous declarations).
7
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Message-Id: <20191230082856.30556-1-philmd@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
6
---
14
scripts/git.orderfile | 3 +++
7
tcg/aarch64/tcg-target.c.inc | 19 +++++++++----------
15
1 file changed, 3 insertions(+)
8
1 file changed, 9 insertions(+), 10 deletions(-)
16
9
17
diff --git a/scripts/git.orderfile b/scripts/git.orderfile
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
18
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
19
--- a/scripts/git.orderfile
12
--- a/tcg/aarch64/tcg-target.c.inc
20
+++ b/scripts/git.orderfile
13
+++ b/tcg/aarch64/tcg-target.c.inc
21
@@ -XXX,XX +XXX,XX @@ qga/*.json
14
@@ -XXX,XX +XXX,XX @@ bool have_lse2;
22
# headers
15
#define TCG_VEC_TMP0 TCG_REG_V31
23
*.h
16
24
17
#ifndef CONFIG_SOFTMMU
25
+# decoding tree specification
18
-/* Note that XZR cannot be encoded in the address base register slot,
26
+*.decode
19
- as that actaully encodes SP. So if we need to zero-extend the guest
27
+
20
- address, via the address index register slot, we need to load even
28
# code
21
- a zero guest base into a register. */
29
*.c
22
-#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
23
#define TCG_REG_GUEST_BASE TCG_REG_X28
24
#endif
25
26
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
27
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
28
}
29
30
- if (USE_GUEST_BASE) {
31
+ if (guest_base || addr_type == TCG_TYPE_I32) {
32
h->base = TCG_REG_GUEST_BASE;
33
h->index = addr_reg;
34
h->index_ext = addr_type;
35
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
36
CPU_TEMP_BUF_NLONGS * sizeof(long));
37
38
#if !defined(CONFIG_SOFTMMU)
39
- if (USE_GUEST_BASE) {
40
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
41
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
42
- }
43
+ /*
44
+ * Note that XZR cannot be encoded in the address base register slot,
45
+ * as that actaully encodes SP. Depending on the guest, we may need
46
+ * to zero-extend the guest address via the address index register slot,
47
+ * therefore we need to load even a zero guest base into a register.
48
+ */
49
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
50
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
51
#endif
52
53
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
30
--
54
--
31
2.20.1
55
2.34.1
32
56
33
57
diff view generated by jsdifflib
1
The result of g_strsplit is never NULL.
1
All uses replaced with TCGContext.addr_type.
2
2
3
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
vl.c | 2 +-
6
tcg/aarch64/tcg-target.c.inc | 11 +++++------
10
1 file changed, 1 insertion(+), 1 deletion(-)
7
1 file changed, 5 insertions(+), 6 deletions(-)
11
8
12
diff --git a/vl.c b/vl.c
9
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/vl.c
11
--- a/tcg/aarch64/tcg-target.c.inc
15
+++ b/vl.c
12
+++ b/tcg/aarch64/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
13
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
17
14
TCGReg addr_reg, MemOpIdx oi,
18
accel_list = g_strsplit(accel, ":", 0);
15
bool is_ld)
19
16
{
20
- for (tmp = accel_list; tmp && *tmp; tmp++) {
17
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
21
+ for (tmp = accel_list; *tmp; tmp++) {
18
+ TCGType addr_type = s->addr_type;
22
/*
19
TCGLabelQemuLdst *ldst = NULL;
23
* Filter invalid accelerators here, to prevent obscenities
20
MemOp opc = get_memop(oi);
24
* such as "-machine accel=tcg,,thread=single".
21
MemOp s_bits = opc & MO_SIZE;
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
24
25
/* Load the tlb comparator into X0, and the fast path addend into X1. */
26
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
27
+ tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
28
is_ld ? offsetof(CPUTLBEntry, addr_read)
29
: offsetof(CPUTLBEntry, addr_write));
30
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
31
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
32
if (a_mask >= s_mask) {
33
x3 = addr_reg;
34
} else {
35
- tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
36
+ tcg_out_insn(s, 3401, ADDI, addr_type,
37
TCG_REG_X3, addr_reg, s_mask - a_mask);
38
x3 = TCG_REG_X3;
39
}
40
compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
41
42
/* Store the page mask part of the address into X3. */
43
- tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
44
- TCG_REG_X3, x3, compare_mask);
45
+ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
46
47
/* Perform the address comparison. */
48
- tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
49
+ tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
50
51
/* If not equal, we jump to the slow path. */
52
ldst->label_ptr[0] = s->code_ptr;
25
--
53
--
26
2.20.1
54
2.34.1
27
55
28
56
diff view generated by jsdifflib
1
The accel_list and tmp variables are only used when manufacturing
1
All uses replaced with TCGContext.addr_type.
2
-machine accel, options based on -accel.
3
2
4
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
vl.c | 3 ++-
6
tcg/loongarch64/tcg-target.c.inc | 9 +++++----
10
1 file changed, 2 insertions(+), 1 deletion(-)
7
1 file changed, 5 insertions(+), 4 deletions(-)
11
8
12
diff --git a/vl.c b/vl.c
9
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/vl.c
11
--- a/tcg/loongarch64/tcg-target.c.inc
15
+++ b/vl.c
12
+++ b/tcg/loongarch64/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
13
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
17
static void configure_accelerators(const char *progname)
14
TCGReg addr_reg, MemOpIdx oi,
15
bool is_ld)
18
{
16
{
19
const char *accel;
17
+ TCGType addr_type = s->addr_type;
20
- char **accel_list, **tmp;
18
TCGLabelQemuLdst *ldst = NULL;
21
bool init_failed = false;
19
MemOp opc = get_memop(oi);
22
20
MemOp a_bits;
23
qemu_opts_foreach(qemu_find_opts("icount"),
21
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
22
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
25
23
26
accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
24
/* Load the tlb comparator and the addend. */
27
if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
25
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
28
+ char **accel_list, **tmp;
26
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
29
+
27
is_ld ? offsetof(CPUTLBEntry, addr_read)
30
if (accel == NULL) {
28
: offsetof(CPUTLBEntry, addr_write));
31
/* Select the default accelerator */
29
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
32
if (!accel_find("tcg") && !accel_find("kvm")) {
30
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
31
if (a_bits < s_bits) {
32
unsigned a_mask = (1u << a_bits) - 1;
33
unsigned s_mask = (1u << s_bits) - 1;
34
- tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
35
+ tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
36
} else {
37
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg);
38
+ tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
39
}
40
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
41
a_bits, TARGET_PAGE_BITS - 1);
42
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
43
h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
44
#endif
45
46
- if (TARGET_LONG_BITS == 32) {
47
+ if (addr_type == TCG_TYPE_I32) {
48
h->base = TCG_REG_TMP0;
49
tcg_out_ext32u(s, h->base, addr_reg);
50
} else {
33
--
51
--
34
2.20.1
52
2.34.1
35
53
36
54
diff view generated by jsdifflib
1
The accel_initialised variable no longer has any setters.
1
All uses replaced with TCGContext.addr_type.
2
2
3
Fixes: 6f6e1698a68c
4
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
5
---
10
vl.c | 3 +--
6
tcg/mips/tcg-target.c.inc | 42 +++++++++++++++++++++------------------
11
1 file changed, 1 insertion(+), 2 deletions(-)
7
1 file changed, 23 insertions(+), 19 deletions(-)
12
8
13
diff --git a/vl.c b/vl.c
9
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
14
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
15
--- a/vl.c
11
--- a/tcg/mips/tcg-target.c.inc
16
+++ b/vl.c
12
+++ b/tcg/mips/tcg-target.c.inc
17
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
13
@@ -XXX,XX +XXX,XX @@ typedef enum {
14
/* Aliases for convenience. */
15
ALIAS_PADD = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
16
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
17
- ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
18
- ? OPC_SRL : OPC_DSRL,
19
- ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
20
- ? OPC_ADDIU : OPC_DADDIU,
21
} MIPSInsn;
22
23
/*
24
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
25
TCGReg addrlo, TCGReg addrhi,
26
MemOpIdx oi, bool is_ld)
18
{
27
{
19
const char *accel;
28
+ TCGType addr_type = s->addr_type;
20
char **accel_list, **tmp;
29
TCGLabelQemuLdst *ldst = NULL;
21
- bool accel_initialised = false;
30
MemOp opc = get_memop(oi);
22
bool init_failed = false;
31
MemOp a_bits;
23
32
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
qemu_opts_foreach(qemu_find_opts("icount"),
33
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
25
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
34
26
35
/* Extract the TLB index from the address into TMP3. */
27
accel_list = g_strsplit(accel, ":", 0);
36
- tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
28
37
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
29
- for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
38
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
30
+ for (tmp = accel_list; tmp && *tmp; tmp++) {
39
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
31
/*
40
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
32
* Filter invalid accelerators here, to prevent obscenities
41
+ } else {
33
* such as "-machine accel=tcg,,thread=single".
42
+ tcg_out_dsrl(s, TCG_TMP3, addrlo,
43
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
44
+ }
45
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
46
47
/* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
48
tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
49
50
- /* Load the (low-half) tlb comparator. */
51
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
52
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
53
- } else {
54
- tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
55
- }
56
-
57
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
58
+ if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
59
+ /* Load the tlb comparator. */
60
+ tcg_out_ld(s, addr_type, TCG_TMP0, TCG_TMP3, cmp_off);
61
/* Load the tlb addend for the fast path. */
62
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
63
+ } else {
64
+ /* Load the low half of the tlb comparator. */
65
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
66
}
67
68
/*
69
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
70
* For unaligned accesses, compare against the end of the access to
71
* verify that it does not cross a page boundary.
72
*/
73
- tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
74
+ tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
75
if (a_mask < s_mask) {
76
- tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
77
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
78
+ tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
79
+ } else {
80
+ tcg_out_opc_imm(s, OPC_DADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
81
+ }
82
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
83
} else {
84
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
85
}
86
87
/* Zero extend a 32-bit guest address for a 64-bit host. */
88
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
89
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
90
tcg_out_ext32u(s, TCG_TMP2, addrlo);
91
addrlo = TCG_TMP2;
92
}
93
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
95
96
/* Load and test the high half tlb comparator. */
97
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
98
+ if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
99
/* delay slot */
100
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
101
102
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
103
}
104
105
base = addrlo;
106
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
107
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
108
tcg_out_ext32u(s, TCG_REG_A0, base);
109
base = TCG_REG_A0;
110
}
34
--
111
--
35
2.20.1
112
2.34.1
36
113
37
114
diff view generated by jsdifflib
1
From: Carlos Santos <casantos@redhat.com>
1
All uses replaced with TCGContext.addr_type.
2
2
3
uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
but the corresponding sysconf calls returns -1, which is a valid result,
5
meaning that the limit is indeterminate.
6
7
Handle this situation using the fallback values instead of crashing due
8
to an assertion failure.
9
10
Signed-off-by: Carlos Santos <casantos@redhat.com>
11
Message-Id: <20191017123713.30192-1-casantos@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
5
---
14
util/cacheinfo.c | 10 ++++++++--
6
tcg/tcg.c | 27 ++++++++++++++-------------
15
1 file changed, 8 insertions(+), 2 deletions(-)
7
1 file changed, 14 insertions(+), 13 deletions(-)
16
8
17
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
18
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
19
--- a/util/cacheinfo.c
11
--- a/tcg/tcg.c
20
+++ b/util/cacheinfo.c
12
+++ b/tcg/tcg.c
21
@@ -XXX,XX +XXX,XX @@ static void sys_cache_info(int *isize, int *dsize)
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
22
static void sys_cache_info(int *isize, int *dsize)
14
next_arg = 1;
23
{
15
24
# ifdef _SC_LEVEL1_ICACHE_LINESIZE
16
loc = &info->in[next_arg];
25
- *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
17
- if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
26
+ int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
18
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
27
+ if (tmp_isize > 0) {
19
- ldst->addrlo_reg, ldst->addrhi_reg);
28
+ *isize = tmp_isize;
20
- tcg_out_helper_load_slots(s, nmov, mov, parm);
29
+ }
21
- next_arg += nmov;
30
# endif
22
- } else {
31
# ifdef _SC_LEVEL1_DCACHE_LINESIZE
23
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
32
- *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
24
/*
33
+ int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
25
* 32-bit host with 32-bit guest: zero-extend the guest address
34
+ if (tmp_dsize > 0) {
26
* to 64-bits for the helper by storing the low part, then
35
+ *dsize = tmp_dsize;
27
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
36
+ }
28
tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
37
# endif
29
TCG_TYPE_I32, 0, parm);
38
}
30
next_arg += 2;
39
#endif /* sys_cache_info */
31
+ } else {
32
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
33
+ ldst->addrlo_reg, ldst->addrhi_reg);
34
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
35
+ next_arg += nmov;
36
}
37
38
switch (info->out_kind) {
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
40
41
/* Handle addr argument. */
42
loc = &info->in[next_arg];
43
- if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
44
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
45
- ldst->addrlo_reg, ldst->addrhi_reg);
46
- next_arg += n;
47
- nmov += n;
48
- } else {
49
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
50
/*
51
* 32-bit host with 32-bit guest: zero-extend the guest address
52
* to 64-bits for the helper by storing the low part. Later,
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
54
ldst->addrlo_reg, -1);
55
next_arg += 2;
56
nmov += 1;
57
+ } else {
58
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
59
+ ldst->addrlo_reg, ldst->addrhi_reg);
60
+ next_arg += n;
61
+ nmov += n;
62
}
63
64
/* Handle data argument. */
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
66
g_assert_not_reached();
67
}
68
69
- if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32) {
70
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
71
+ /* Zero extend the address by loading a zero for the high part. */
72
loc = &info->in[1 + !HOST_BIG_ENDIAN];
73
tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
74
}
40
--
75
--
41
2.20.1
76
2.34.1
42
77
43
78
diff view generated by jsdifflib
New patch
1
Disconnect guest page size from TCG compilation.
2
While this could be done via exec/target_page.h, we want to cache
3
the value across multiple memory access operations, so we might
4
as well initialize this early.
1
5
6
The changes within tcg/ are entirely mechanical:
7
8
sed -i s/TARGET_PAGE_BITS/s->page_bits/g
9
sed -i s/TARGET_PAGE_MASK/s->page_mask/g
10
11
Reviewed-by: Anton Johansson <anjo@rev.ng>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
include/tcg/tcg.h | 5 +++++
15
accel/tcg/translate-all.c | 4 ++++
16
tcg/aarch64/tcg-target.c.inc | 6 +++---
17
tcg/arm/tcg-target.c.inc | 10 +++++-----
18
tcg/i386/tcg-target.c.inc | 6 +++---
19
tcg/loongarch64/tcg-target.c.inc | 4 ++--
20
tcg/mips/tcg-target.c.inc | 6 +++---
21
tcg/ppc/tcg-target.c.inc | 14 +++++++-------
22
tcg/riscv/tcg-target.c.inc | 4 ++--
23
tcg/s390x/tcg-target.c.inc | 4 ++--
24
tcg/sparc64/tcg-target.c.inc | 4 ++--
25
11 files changed, 38 insertions(+), 29 deletions(-)
26
27
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/tcg/tcg.h
30
+++ b/include/tcg/tcg.h
31
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
32
int nb_ops;
33
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
34
35
+#ifdef CONFIG_SOFTMMU
36
+ int page_mask;
37
+ uint8_t page_bits;
38
+#endif
39
+
40
TCGRegSet reserved_regs;
41
intptr_t current_frame_offset;
42
intptr_t frame_start;
43
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/accel/tcg/translate-all.c
46
+++ b/accel/tcg/translate-all.c
47
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
48
tb_set_page_addr1(tb, -1);
49
tcg_ctx->gen_tb = tb;
50
tcg_ctx->addr_type = TCG_TYPE_TL;
51
+#ifdef CONFIG_SOFTMMU
52
+ tcg_ctx->page_bits = TARGET_PAGE_BITS;
53
+ tcg_ctx->page_mask = TARGET_PAGE_MASK;
54
+#endif
55
56
tb_overflow:
57
58
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/aarch64/tcg-target.c.inc
61
+++ b/tcg/aarch64/tcg-target.c.inc
62
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
63
ldst->oi = oi;
64
ldst->addrlo_reg = addr_reg;
65
66
- mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
67
+ mask_type = (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32
68
? TCG_TYPE_I64 : TCG_TYPE_I32);
69
70
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
71
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
72
/* Extract the TLB index from the address into X0. */
73
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
74
TCG_REG_X0, TCG_REG_X0, addr_reg,
75
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
76
+ s->page_bits - CPU_TLB_ENTRY_BITS);
77
78
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
79
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
80
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
81
TCG_REG_X3, addr_reg, s_mask - a_mask);
82
x3 = TCG_REG_X3;
83
}
84
- compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
85
+ compare_mask = (uint64_t)s->page_mask | a_mask;
86
87
/* Store the page mask part of the address into X3. */
88
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
89
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
90
index XXXXXXX..XXXXXXX 100644
91
--- a/tcg/arm/tcg-target.c.inc
92
+++ b/tcg/arm/tcg-target.c.inc
93
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
95
/* Extract the tlb index from the address into R0. */
96
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
97
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
98
+ SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
99
100
/*
101
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
102
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
103
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
104
addrlo, s_mask - a_mask);
105
}
106
- if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
107
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
108
+ if (use_armv7_instructions && s->page_bits <= 16) {
109
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
110
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
111
t_addr, TCG_REG_TMP, 0);
112
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
113
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
114
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
115
}
116
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
117
- SHIFT_IMM_LSR(TARGET_PAGE_BITS));
118
+ SHIFT_IMM_LSR(s->page_bits));
119
tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
120
0, TCG_REG_R2, TCG_REG_TMP,
121
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
122
+ SHIFT_IMM_LSL(s->page_bits));
123
}
124
125
if (s->addr_type != TCG_TYPE_I32) {
126
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
127
index XXXXXXX..XXXXXXX 100644
128
--- a/tcg/i386/tcg-target.c.inc
129
+++ b/tcg/i386/tcg-target.c.inc
130
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
131
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
132
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
133
hrexw = P_REXW;
134
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
135
+ if (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32) {
136
tlbtype = TCG_TYPE_I64;
137
tlbrexw = P_REXW;
138
}
139
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
140
141
tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
142
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
143
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
144
+ s->page_bits - CPU_TLB_ENTRY_BITS);
145
146
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
147
TLB_MASK_TABLE_OFS(mem_index) +
148
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
149
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
150
addrlo, s_mask - a_mask);
151
}
152
- tlb_mask = TARGET_PAGE_MASK | a_mask;
153
+ tlb_mask = s->page_mask | a_mask;
154
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
155
156
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
157
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tcg/loongarch64/tcg-target.c.inc
160
+++ b/tcg/loongarch64/tcg-target.c.inc
161
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
162
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
163
164
tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
165
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
166
+ s->page_bits - CPU_TLB_ENTRY_BITS);
167
tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
168
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
169
170
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
171
tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
172
}
173
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
174
- a_bits, TARGET_PAGE_BITS - 1);
175
+ a_bits, s->page_bits - 1);
176
177
/* Compare masked address with the TLB entry. */
178
ldst->label_ptr[0] = s->code_ptr;
179
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
180
index XXXXXXX..XXXXXXX 100644
181
--- a/tcg/mips/tcg-target.c.inc
182
+++ b/tcg/mips/tcg-target.c.inc
183
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
184
/* Extract the TLB index from the address into TMP3. */
185
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
186
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
187
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
188
+ s->page_bits - CPU_TLB_ENTRY_BITS);
189
} else {
190
tcg_out_dsrl(s, TCG_TMP3, addrlo,
191
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
192
+ s->page_bits - CPU_TLB_ENTRY_BITS);
193
}
194
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
195
196
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
197
* For unaligned accesses, compare against the end of the access to
198
* verify that it does not cross a page boundary.
199
*/
200
- tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
201
+ tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
202
if (a_mask < s_mask) {
203
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
204
tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
205
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
206
index XXXXXXX..XXXXXXX 100644
207
--- a/tcg/ppc/tcg-target.c.inc
208
+++ b/tcg/ppc/tcg-target.c.inc
209
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
210
/* Extract the page index, shifted into place for tlb index. */
211
if (TCG_TARGET_REG_BITS == 32) {
212
tcg_out_shri32(s, TCG_REG_R0, addrlo,
213
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
214
+ s->page_bits - CPU_TLB_ENTRY_BITS);
215
} else {
216
tcg_out_shri64(s, TCG_REG_R0, addrlo,
217
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
218
+ s->page_bits - CPU_TLB_ENTRY_BITS);
219
}
220
tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
221
222
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
223
a_bits = s_bits;
224
}
225
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
226
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
227
+ (32 - a_bits) & 31, 31 - s->page_bits);
228
} else {
229
TCGReg t = addrlo;
230
231
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
232
/* Mask the address for the requested alignment. */
233
if (TARGET_LONG_BITS == 32) {
234
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
235
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
236
+ (32 - a_bits) & 31, 31 - s->page_bits);
237
} else if (a_bits == 0) {
238
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
239
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
240
} else {
241
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
242
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
243
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
244
+ 64 - s->page_bits, s->page_bits - a_bits);
245
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
246
}
247
}
248
249
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
250
index XXXXXXX..XXXXXXX 100644
251
--- a/tcg/riscv/tcg-target.c.inc
252
+++ b/tcg/riscv/tcg-target.c.inc
253
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
254
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
255
256
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
257
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
258
+ s->page_bits - CPU_TLB_ENTRY_BITS);
259
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
260
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
261
262
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
263
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
264
addr_adj, addr_reg, s_mask - a_mask);
265
}
266
- compare_mask = TARGET_PAGE_MASK | a_mask;
267
+ compare_mask = s->page_mask | a_mask;
268
if (compare_mask == sextreg(compare_mask, 0, 12)) {
269
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
270
} else {
271
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
272
index XXXXXXX..XXXXXXX 100644
273
--- a/tcg/s390x/tcg-target.c.inc
274
+++ b/tcg/s390x/tcg-target.c.inc
275
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
276
ldst->addrlo_reg = addr_reg;
277
278
tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
279
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
280
+ s->page_bits - CPU_TLB_ENTRY_BITS);
281
282
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
283
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
284
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
285
* cross pages using the address of the last byte of the access.
286
*/
287
a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
288
- tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
289
+ tlb_mask = (uint64_t)s->page_mask | a_mask;
290
if (a_off == 0) {
291
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
292
} else {
293
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
294
index XXXXXXX..XXXXXXX 100644
295
--- a/tcg/sparc64/tcg-target.c.inc
296
+++ b/tcg/sparc64/tcg-target.c.inc
297
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
298
299
/* Extract the page index, shifted into place for tlb index. */
300
tcg_out_arithi(s, TCG_REG_T1, addr_reg,
301
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
302
+ s->page_bits - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
303
tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
304
305
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
306
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
307
h->base = TCG_REG_T1;
308
309
/* Mask out the page offset, except for the required alignment. */
310
- compare_mask = TARGET_PAGE_MASK | a_mask;
311
+ compare_mask = s->page_mask | a_mask;
312
if (check_fit_tl(compare_mask, 13)) {
313
tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
314
} else {
315
--
316
2.34.1
diff view generated by jsdifflib
New patch
1
Disconnect guest tlb parameters from TCG compilation.
1
2
3
Reviewed-by: Anton Johansson <anjo@rev.ng>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 1 +
7
accel/tcg/translate-all.c | 1 +
8
tcg/aarch64/tcg-target.c.inc | 2 +-
9
tcg/i386/tcg-target.c.inc | 2 +-
10
4 files changed, 4 insertions(+), 2 deletions(-)
11
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg.h
15
+++ b/include/tcg/tcg.h
16
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
17
#ifdef CONFIG_SOFTMMU
18
int page_mask;
19
uint8_t page_bits;
20
+ uint8_t tlb_dyn_max_bits;
21
#endif
22
23
TCGRegSet reserved_regs;
24
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/accel/tcg/translate-all.c
27
+++ b/accel/tcg/translate-all.c
28
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
29
#ifdef CONFIG_SOFTMMU
30
tcg_ctx->page_bits = TARGET_PAGE_BITS;
31
tcg_ctx->page_mask = TARGET_PAGE_MASK;
32
+ tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
33
#endif
34
35
tb_overflow:
36
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/aarch64/tcg-target.c.inc
39
+++ b/tcg/aarch64/tcg-target.c.inc
40
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
41
ldst->oi = oi;
42
ldst->addrlo_reg = addr_reg;
43
44
- mask_type = (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32
45
+ mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
46
? TCG_TYPE_I64 : TCG_TYPE_I32);
47
48
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
49
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tcg/i386/tcg-target.c.inc
52
+++ b/tcg/i386/tcg-target.c.inc
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
55
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
56
hrexw = P_REXW;
57
- if (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32) {
58
+ if (s->page_bits + s->tlb_dyn_max_bits > 32) {
59
tlbtype = TCG_TYPE_I64;
60
tlbrexw = P_REXW;
61
}
62
--
63
2.34.1
diff view generated by jsdifflib
New patch
1
TCG will need this declaration, without all of the other
2
bits that come with cpu-all.h.
1
3
4
Reviewed-by: Thomas Huth <thuth@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/cpu-all.h | 5 +----
8
include/exec/user/guest-base.h | 12 ++++++++++++
9
tcg/tcg.c | 3 +++
10
3 files changed, 16 insertions(+), 4 deletions(-)
11
create mode 100644 include/exec/user/guest-base.h
12
13
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/cpu-all.h
16
+++ b/include/exec/cpu-all.h
17
@@ -XXX,XX +XXX,XX @@
18
19
#if defined(CONFIG_USER_ONLY)
20
#include "exec/user/abitypes.h"
21
+#include "exec/user/guest-base.h"
22
23
-/* On some host systems the guest address space is reserved on the host.
24
- * This allows the guest address space to be offset to a convenient location.
25
- */
26
-extern uintptr_t guest_base;
27
extern bool have_guest_base;
28
29
/*
30
diff --git a/include/exec/user/guest-base.h b/include/exec/user/guest-base.h
31
new file mode 100644
32
index XXXXXXX..XXXXXXX
33
--- /dev/null
34
+++ b/include/exec/user/guest-base.h
35
@@ -XXX,XX +XXX,XX @@
36
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
37
+/*
38
+ * Declaration of guest_base.
39
+ * Copyright (c) 2003 Fabrice Bellard
40
+ */
41
+
42
+#ifndef EXEC_USER_GUEST_BASE_H
43
+#define EXEC_USER_GUEST_BASE_H
44
+
45
+extern uintptr_t guest_base;
46
+
47
+#endif
48
diff --git a/tcg/tcg.c b/tcg/tcg.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/tcg/tcg.c
51
+++ b/tcg/tcg.c
52
@@ -XXX,XX +XXX,XX @@
53
#include "tcg/tcg-temp-internal.h"
54
#include "tcg-internal.h"
55
#include "accel/tcg/perf.h"
56
+#ifdef CONFIG_USER_ONLY
57
+#include "exec/user/guest-base.h"
58
+#endif
59
60
/* Forward declarations for functions declared in tcg-target.c.inc and
61
used here. */
62
--
63
2.34.1
diff view generated by jsdifflib