1
The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:
1
The following changes since commit 8844bb8d896595ee1d25d21c770e6e6f29803097:
2
2
3
Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)
3
Merge tag 'or1k-pull-request-20230513' of https://github.com/stffrdhrn/qemu into staging (2023-05-13 11:23:14 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211027
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230516
8
8
9
for you to fetch changes up to 820c025f0dcacf2f3c12735b1f162893fbfa7bc6:
9
for you to fetch changes up to ee95d036bf4bfa10be65325a287bf3d0e8b2a0e6:
10
10
11
tcg/optimize: Propagate sign info for shifting (2021-10-27 17:11:23 -0700)
11
tcg: Split out exec/user/guest-base.h (2023-05-16 08:11:53 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Improvements to qemu/int128
14
tcg/i386: Fix tcg_out_addi_ptr for win64
15
Fixes for 128/64 division.
15
tcg: Implement atomicity for TCGv_i128
16
Cleanup tcg/optimize.c
16
tcg: First quarter of cleanups for building tcg once
17
Optimize redundant sign extensions
18
17
19
----------------------------------------------------------------
18
----------------------------------------------------------------
20
Frédéric Pétrot (1):
19
Richard Henderson (80):
21
qemu/int128: Add int128_{not,xor}
20
tcg/i386: Set P_REXW in tcg_out_addi_ptr
21
include/exec/memop: Add MO_ATOM_*
22
accel/tcg: Honor atomicity of loads
23
accel/tcg: Honor atomicity of stores
24
tcg: Unify helper_{be,le}_{ld,st}*
25
accel/tcg: Implement helper_{ld,st}*_mmu for user-only
26
tcg/tci: Use helper_{ld,st}*_mmu for user-only
27
tcg: Add 128-bit guest memory primitives
28
meson: Detect atomic128 support with optimization
29
tcg/i386: Add have_atomic16
30
tcg/aarch64: Detect have_lse, have_lse2 for linux
31
tcg/aarch64: Detect have_lse, have_lse2 for darwin
32
tcg/i386: Use full load/store helpers in user-only mode
33
tcg/aarch64: Use full load/store helpers in user-only mode
34
tcg/ppc: Use full load/store helpers in user-only mode
35
tcg/loongarch64: Use full load/store helpers in user-only mode
36
tcg/riscv: Use full load/store helpers in user-only mode
37
tcg/arm: Adjust constraints on qemu_ld/st
38
tcg/arm: Use full load/store helpers in user-only mode
39
tcg/mips: Use full load/store helpers in user-only mode
40
tcg/s390x: Use full load/store helpers in user-only mode
41
tcg/sparc64: Allocate %g2 as a third temporary
42
tcg/sparc64: Rename tcg_out_movi_imm13 to tcg_out_movi_s13
43
target/sparc64: Remove tcg_out_movi_s13 case from tcg_out_movi_imm32
44
tcg/sparc64: Rename tcg_out_movi_imm32 to tcg_out_movi_u32
45
tcg/sparc64: Split out tcg_out_movi_s32
46
tcg/sparc64: Use standard slow path for softmmu
47
accel/tcg: Remove helper_unaligned_{ld,st}
48
tcg/loongarch64: Check the host supports unaligned accesses
49
tcg/loongarch64: Support softmmu unaligned accesses
50
tcg/riscv: Support softmmu unaligned accesses
51
tcg: Introduce tcg_target_has_memory_bswap
52
tcg: Add INDEX_op_qemu_{ld,st}_i128
53
tcg: Introduce tcg_out_movext3
54
tcg: Merge tcg_out_helper_load_regs into caller
55
tcg: Support TCG_TYPE_I128 in tcg_out_{ld,st}_helper_{args,ret}
56
tcg: Introduce atom_and_align_for_opc
57
tcg/i386: Use atom_and_align_for_opc
58
tcg/aarch64: Use atom_and_align_for_opc
59
tcg/arm: Use atom_and_align_for_opc
60
tcg/loongarch64: Use atom_and_align_for_opc
61
tcg/mips: Use atom_and_align_for_opc
62
tcg/ppc: Use atom_and_align_for_opc
63
tcg/riscv: Use atom_and_align_for_opc
64
tcg/s390x: Use atom_and_align_for_opc
65
tcg/sparc64: Use atom_and_align_for_opc
66
tcg/i386: Honor 64-bit atomicity in 32-bit mode
67
tcg/i386: Support 128-bit load/store with have_atomic16
68
tcg/aarch64: Rename temporaries
69
tcg/aarch64: Support 128-bit load/store
70
tcg/ppc: Support 128-bit load/store
71
tcg/s390x: Support 128-bit load/store
72
tcg: Split out memory ops to tcg-op-ldst.c
73
tcg: Widen gen_insn_data to uint64_t
74
accel/tcg: Widen tcg-ldst.h addresses to uint64_t
75
tcg: Widen helper_{ld,st}_i128 addresses to uint64_t
76
tcg: Widen helper_atomic_* addresses to uint64_t
77
tcg: Widen tcg_gen_code pc_start argument to uint64_t
78
accel/tcg: Merge gen_mem_wrapped with plugin_gen_empty_mem_callback
79
accel/tcg: Merge do_gen_mem_cb into caller
80
tcg: Reduce copies for plugin_gen_mem_callbacks
81
accel/tcg: Widen plugin_gen_empty_mem_callback to i64
82
tcg: Add addr_type to TCGContext
83
tcg: Remove TCGv from tcg_gen_qemu_{ld,st}_*
84
tcg: Remove TCGv from tcg_gen_atomic_*
85
tcg: Split INDEX_op_qemu_{ld,st}* for guest address size
86
tcg/tci: Elimnate TARGET_LONG_BITS, target_ulong
87
tcg/i386: Always enable TCG_TARGET_HAS_extr[lh]_i64_i32
88
tcg/i386: Conditionalize tcg_out_extu_i32_i64
89
tcg/i386: Adjust type of tlb_mask
90
tcg/i386: Remove TARGET_LONG_BITS, TCG_TYPE_TL
91
tcg/arm: Remove TARGET_LONG_BITS
92
tcg/aarch64: Remove USE_GUEST_BASE
93
tcg/aarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
94
tcg/loongarch64: Remove TARGET_LONG_BITS, TCG_TYPE_TL
95
tcg/mips: Remove TARGET_LONG_BITS, TCG_TYPE_TL
96
tcg: Remove TARGET_LONG_BITS, TCG_TYPE_TL
97
tcg: Add page_bits and page_mask to TCGContext
98
tcg: Add tlb_dyn_max_bits to TCGContext
99
tcg: Split out exec/user/guest-base.h
22
100
23
Luis Pires (4):
101
docs/devel/loads-stores.rst | 36 +-
24
host-utils: move checks out of divu128/divs128
102
docs/devel/tcg-ops.rst | 11 +-
25
host-utils: move udiv_qrnnd() to host-utils
103
meson.build | 52 +-
26
host-utils: add 128-bit quotient support to divu128/divs128
104
accel/tcg/tcg-runtime.h | 49 +-
27
host-utils: add unit tests for divu128/divs128
105
include/exec/cpu-all.h | 5 +-
28
106
include/exec/memop.h | 37 ++
29
Richard Henderson (51):
107
include/exec/plugin-gen.h | 4 +-
30
tcg/optimize: Rename "mask" to "z_mask"
108
include/exec/user/guest-base.h | 12 +
31
tcg/optimize: Split out OptContext
109
include/qemu/cpuid.h | 18 +
32
tcg/optimize: Remove do_default label
110
include/tcg/tcg-ldst.h | 72 +--
33
tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
111
include/tcg/tcg-op.h | 273 ++++++---
34
tcg/optimize: Move prev_mb into OptContext
112
include/tcg/tcg-opc.h | 41 +-
35
tcg/optimize: Split out init_arguments
113
include/tcg/tcg.h | 39 +-
36
tcg/optimize: Split out copy_propagate
114
tcg/aarch64/tcg-target-con-set.h | 2 +
37
tcg/optimize: Split out fold_call
115
tcg/aarch64/tcg-target.h | 15 +-
38
tcg/optimize: Drop nb_oargs, nb_iargs locals
116
tcg/arm/tcg-target-con-set.h | 16 +-
39
tcg/optimize: Change fail return for do_constant_folding_cond*
117
tcg/arm/tcg-target-con-str.h | 5 +-
40
tcg/optimize: Return true from tcg_opt_gen_{mov,movi}
118
tcg/arm/tcg-target.h | 3 +-
41
tcg/optimize: Split out finish_folding
119
tcg/i386/tcg-target.h | 13 +-
42
tcg/optimize: Use a boolean to avoid a mass of continues
120
tcg/loongarch64/tcg-target.h | 3 +-
43
tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
121
tcg/mips/tcg-target.h | 4 +-
44
tcg/optimize: Split out fold_const{1,2}
122
tcg/ppc/tcg-target-con-set.h | 2 +
45
tcg/optimize: Split out fold_setcond2
123
tcg/ppc/tcg-target-con-str.h | 1 +
46
tcg/optimize: Split out fold_brcond2
124
tcg/ppc/tcg-target.h | 4 +-
47
tcg/optimize: Split out fold_brcond
125
tcg/riscv/tcg-target.h | 4 +-
48
tcg/optimize: Split out fold_setcond
126
tcg/s390x/tcg-target-con-set.h | 2 +
49
tcg/optimize: Split out fold_mulu2_i32
127
tcg/s390x/tcg-target.h | 4 +-
50
tcg/optimize: Split out fold_addsub2_i32
128
tcg/sparc64/tcg-target-con-set.h | 2 -
51
tcg/optimize: Split out fold_movcond
129
tcg/sparc64/tcg-target-con-str.h | 1 -
52
tcg/optimize: Split out fold_extract2
130
tcg/sparc64/tcg-target.h | 4 +-
53
tcg/optimize: Split out fold_extract, fold_sextract
131
tcg/tcg-internal.h | 2 +
54
tcg/optimize: Split out fold_deposit
132
tcg/tci/tcg-target.h | 4 +-
55
tcg/optimize: Split out fold_count_zeros
133
accel/tcg/cputlb.c | 839 ++++++++++++++++---------
56
tcg/optimize: Split out fold_bswap
134
accel/tcg/plugin-gen.c | 68 +-
57
tcg/optimize: Split out fold_dup, fold_dup2
135
accel/tcg/translate-all.c | 35 +-
58
tcg/optimize: Split out fold_mov
136
accel/tcg/user-exec.c | 488 ++++++++++-----
59
tcg/optimize: Split out fold_xx_to_i
137
tcg/optimize.c | 19 +-
60
tcg/optimize: Split out fold_xx_to_x
138
tcg/tcg-op-ldst.c | 1234 +++++++++++++++++++++++++++++++++++++
61
tcg/optimize: Split out fold_xi_to_i
139
tcg/tcg-op.c | 864 --------------------------
62
tcg/optimize: Add type to OptContext
140
tcg/tcg.c | 627 +++++++++++++++----
63
tcg/optimize: Split out fold_to_not
141
tcg/tci.c | 243 +++-----
64
tcg/optimize: Split out fold_sub_to_neg
142
accel/tcg/atomic_common.c.inc | 14 +-
65
tcg/optimize: Split out fold_xi_to_x
143
accel/tcg/ldst_atomicity.c.inc | 1262 ++++++++++++++++++++++++++++++++++++++
66
tcg/optimize: Split out fold_ix_to_i
144
tcg/aarch64/tcg-target.c.inc | 438 ++++++++-----
67
tcg/optimize: Split out fold_masks
145
tcg/arm/tcg-target.c.inc | 246 +++-----
68
tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
146
tcg/i386/tcg-target.c.inc | 467 ++++++++++----
69
tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
147
tcg/loongarch64/tcg-target.c.inc | 123 ++--
70
tcg/optimize: Sink commutative operand swapping into fold functions
148
tcg/mips/tcg-target.c.inc | 216 +++----
71
tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
149
tcg/ppc/tcg-target.c.inc | 300 +++++----
72
tcg/optimize: Use fold_xx_to_i for orc
150
tcg/riscv/tcg-target.c.inc | 161 ++---
73
tcg/optimize: Use fold_xi_to_x for mul
151
tcg/s390x/tcg-target.c.inc | 207 ++++---
74
tcg/optimize: Use fold_xi_to_x for div
152
tcg/sparc64/tcg-target.c.inc | 731 ++++++++--------------
75
tcg/optimize: Use fold_xx_to_i for rem
153
tcg/tci/tcg-target.c.inc | 58 +-
76
tcg/optimize: Optimize sign extensions
154
tcg/meson.build | 1 +
77
tcg/optimize: Propagate sign info for logical operations
155
54 files changed, 5988 insertions(+), 3393 deletions(-)
78
tcg/optimize: Propagate sign info for setcond
156
create mode 100644 include/exec/user/guest-base.h
79
tcg/optimize: Propagate sign info for bit counting
157
create mode 100644 tcg/tcg-op-ldst.c
80
tcg/optimize: Propagate sign info for shifting
158
create mode 100644 accel/tcg/ldst_atomicity.c.inc
81
82
include/fpu/softfloat-macros.h | 82 --
83
include/hw/clock.h | 5 +-
84
include/qemu/host-utils.h | 121 +-
85
include/qemu/int128.h | 20 +
86
target/ppc/int_helper.c | 23 +-
87
tcg/optimize.c | 2644 ++++++++++++++++++++++++----------------
88
tests/unit/test-div128.c | 197 +++
89
util/host-utils.c | 147 ++-
90
tests/unit/meson.build | 1 +
91
9 files changed, 2053 insertions(+), 1187 deletions(-)
92
create mode 100644 tests/unit/test-div128.c
93
diff view generated by jsdifflib
New patch
1
The REXW bit must be set to produce a 64-bit pointer result; the
2
bit is disabled in 32-bit mode, so we can do this unconditionally.
1
3
4
Fixes: 7d9e1ee424b0 ("tcg/i386: Adjust assert in tcg_out_addi_ptr")
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1592
6
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1642
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/i386/tcg-target.c.inc | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
17
{
18
/* This function is only used for passing structs by reference. */
19
tcg_debug_assert(imm == (int32_t)imm);
20
- tcg_out_modrm_offset(s, OPC_LEA, rd, rs, imm);
21
+ tcg_out_modrm_offset(s, OPC_LEA | P_REXW, rd, rs, imm);
22
}
23
24
static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
25
--
26
2.34.1
diff view generated by jsdifflib
New patch
1
This field may be used to describe the precise atomicity requirements
2
of the guest, which may then be used to constrain the methods by which
3
it may be emulated by the host.
1
4
5
For instance, the AArch64 LDP (32-bit) instruction changes semantics
6
with ARMv8.4 LSE2, from
7
8
MO_64 | MO_ATOM_IFALIGN_PAIR
9
(64-bits, single-copy atomic only on 4 byte units,
10
nonatomic if not aligned by 4),
11
12
to
13
14
MO_64 | MO_ATOM_WITHIN16
15
(64-bits, single-copy atomic within a 16 byte block)
16
17
The former may be implemented with two 4 byte loads, or a single 8 byte
18
load if that happens to be efficient on the host. The latter may not
19
be implemented with two 4 byte loads and may also require a helper when
20
misaligned.
21
22
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
23
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
24
---
25
include/exec/memop.h | 37 +++++++++++++++++++++++++++++++++++++
26
tcg/tcg.c | 27 +++++++++++++++++++++------
27
2 files changed, 58 insertions(+), 6 deletions(-)
28
29
diff --git a/include/exec/memop.h b/include/exec/memop.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/exec/memop.h
32
+++ b/include/exec/memop.h
33
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
34
MO_ALIGN_64 = 6 << MO_ASHIFT,
35
MO_ALIGN = MO_AMASK,
36
37
+ /*
38
+ * MO_ATOM_* describes the atomicity requirements of the operation:
39
+ * MO_ATOM_IFALIGN: the operation must be single-copy atomic if it
40
+ * is aligned; if unaligned there is no atomicity.
41
+ * MO_ATOM_IFALIGN_PAIR: the entire operation may be considered to
42
+ * be a pair of half-sized operations which are packed together
43
+ * for convenience, with single-copy atomicity on each half if
44
+ * the half is aligned.
45
+ * This is the atomicity e.g. of Arm pre-FEAT_LSE2 LDP.
46
+ * MO_ATOM_WITHIN16: the operation is single-copy atomic, even if it
47
+ * is unaligned, so long as it does not cross a 16-byte boundary;
48
+ * if it crosses a 16-byte boundary there is no atomicity.
49
+ * This is the atomicity e.g. of Arm FEAT_LSE2 LDR.
50
+ * MO_ATOM_WITHIN16_PAIR: the entire operation is single-copy atomic,
51
+ * if it happens to be within a 16-byte boundary, otherwise it
52
+ * devolves to a pair of half-sized MO_ATOM_WITHIN16 operations.
53
+ * Depending on alignment, one or both will be single-copy atomic.
54
+ * This is the atomicity e.g. of Arm FEAT_LSE2 LDP.
55
+ * MO_ATOM_SUBALIGN: the operation is single-copy atomic by parts
56
+ * by the alignment. E.g. if the address is 0 mod 4, then each
57
+ * 4-byte subobject is single-copy atomic.
58
+ * This is the atomicity e.g. of IBM Power.
59
+ * MO_ATOM_NONE: the operation has no atomicity requirements.
60
+ *
61
+ * Note the default (i.e. 0) value is single-copy atomic to the
62
+ * size of the operation, if aligned. This retains the behaviour
63
+ * from before this field was introduced.
64
+ */
65
+ MO_ATOM_SHIFT = 8,
66
+ MO_ATOM_IFALIGN = 0 << MO_ATOM_SHIFT,
67
+ MO_ATOM_IFALIGN_PAIR = 1 << MO_ATOM_SHIFT,
68
+ MO_ATOM_WITHIN16 = 2 << MO_ATOM_SHIFT,
69
+ MO_ATOM_WITHIN16_PAIR = 3 << MO_ATOM_SHIFT,
70
+ MO_ATOM_SUBALIGN = 4 << MO_ATOM_SHIFT,
71
+ MO_ATOM_NONE = 5 << MO_ATOM_SHIFT,
72
+ MO_ATOM_MASK = 7 << MO_ATOM_SHIFT,
73
+
74
/* Combinations of the above, for ease of use. */
75
MO_UB = MO_8,
76
MO_UW = MO_16,
77
diff --git a/tcg/tcg.c b/tcg/tcg.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/tcg.c
80
+++ b/tcg/tcg.c
81
@@ -XXX,XX +XXX,XX @@ static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
82
[MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
83
};
84
85
+static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
86
+ [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
87
+ [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
88
+ [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
89
+ [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
90
+ [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
91
+ [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
92
+};
93
+
94
static const char bswap_flag_name[][6] = {
95
[TCG_BSWAP_IZ] = "iz",
96
[TCG_BSWAP_OZ] = "oz",
97
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
98
case INDEX_op_qemu_ld_i64:
99
case INDEX_op_qemu_st_i64:
100
{
101
+ const char *s_al, *s_op, *s_at;
102
MemOpIdx oi = op->args[k++];
103
MemOp op = get_memop(oi);
104
unsigned ix = get_mmuidx(oi);
105
106
- if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
107
- col += ne_fprintf(f, ",$0x%x,%u", op, ix);
108
+ s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
109
+ s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
110
+ s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
111
+ op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
112
+
113
+ /* If all fields are accounted for, print symbolically. */
114
+ if (!op && s_al && s_op && s_at) {
115
+ col += ne_fprintf(f, ",%s%s%s,%u",
116
+ s_at, s_al, s_op, ix);
117
} else {
118
- const char *s_al, *s_op;
119
- s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
120
- s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
121
- col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
122
+ op = get_memop(oi);
123
+ col += ne_fprintf(f, ",$0x%x,%u", op, ix);
124
}
125
i = 1;
126
}
127
--
128
2.34.1
diff view generated by jsdifflib
1
Move all of the known-zero optimizations into the per-opcode
1
Create ldst_atomicity.c.inc.
2
functions. Use fold_masks when there is a possibility of the
3
result being determined, and simply set ctx->z_mask otherwise.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Not required for user-only code loads, because we've ensured that
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
the page is read-only before beginning to translate code.
5
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
9
accel/tcg/cputlb.c | 175 +++++++---
10
1 file changed, 294 insertions(+), 251 deletions(-)
10
accel/tcg/user-exec.c | 26 +-
11
accel/tcg/ldst_atomicity.c.inc | 566 +++++++++++++++++++++++++++++++++
12
3 files changed, 716 insertions(+), 51 deletions(-)
13
create mode 100644 accel/tcg/ldst_atomicity.c.inc
11
14
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
17
--- a/accel/tcg/cputlb.c
15
+++ b/tcg/optimize.c
18
+++ b/accel/tcg/cputlb.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
19
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
17
TCGTempSet temps_used;
20
return qemu_ram_addr_from_host_nofail(p);
18
19
/* In flight values from optimization. */
20
- uint64_t z_mask;
21
+ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
22
+ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
23
TCGType type;
24
} OptContext;
25
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
27
return false;
28
}
21
}
29
22
30
+static bool fold_masks(OptContext *ctx, TCGOp *op)
23
+/* Load/store with atomicity primitives. */
31
+{
24
+#include "ldst_atomicity.c.inc"
32
+ uint64_t a_mask = ctx->a_mask;
25
+
33
+ uint64_t z_mask = ctx->z_mask;
26
#ifdef CONFIG_PLUGIN
27
/*
28
* Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
29
@@ -XXX,XX +XXX,XX @@ static void validate_memop(MemOpIdx oi, MemOp expected)
30
* specifically for reading instructions from system memory. It is
31
* called by the translation loop and in some helpers where the code
32
* is disassembled. It shouldn't be called directly by guest code.
33
- */
34
-
35
-typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
36
- MemOpIdx oi, uintptr_t retaddr);
37
-
38
-static inline uint64_t QEMU_ALWAYS_INLINE
39
-load_memop(const void *haddr, MemOp op)
40
-{
41
- switch (op) {
42
- case MO_UB:
43
- return ldub_p(haddr);
44
- case MO_BEUW:
45
- return lduw_be_p(haddr);
46
- case MO_LEUW:
47
- return lduw_le_p(haddr);
48
- case MO_BEUL:
49
- return (uint32_t)ldl_be_p(haddr);
50
- case MO_LEUL:
51
- return (uint32_t)ldl_le_p(haddr);
52
- case MO_BEUQ:
53
- return ldq_be_p(haddr);
54
- case MO_LEUQ:
55
- return ldq_le_p(haddr);
56
- default:
57
- qemu_build_not_reached();
58
- }
59
-}
60
-
61
-/*
62
+ *
63
* For the benefit of TCG generated code, we want to avoid the
64
* complication of ABI-specific return type promotion and always
65
* return a value extended to the register size of the host. This is
66
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
67
return ret_be;
68
}
69
70
+/**
71
+ * do_ld_parts_beN
72
+ * @p: translation parameters
73
+ * @ret_be: accumulated data
74
+ *
75
+ * As do_ld_bytes_beN, but atomically on each aligned part.
76
+ */
77
+static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
78
+{
79
+ void *haddr = p->haddr;
80
+ int size = p->size;
81
+
82
+ do {
83
+ uint64_t x;
84
+ int n;
85
+
86
+ /*
87
+ * Find minimum of alignment and size.
88
+ * This is slightly stronger than required by MO_ATOM_SUBALIGN, which
89
+ * would have only checked the low bits of addr|size once at the start,
90
+ * but is just as easy.
91
+ */
92
+ switch (((uintptr_t)haddr | size) & 7) {
93
+ case 4:
94
+ x = cpu_to_be32(load_atomic4(haddr));
95
+ ret_be = (ret_be << 32) | x;
96
+ n = 4;
97
+ break;
98
+ case 2:
99
+ case 6:
100
+ x = cpu_to_be16(load_atomic2(haddr));
101
+ ret_be = (ret_be << 16) | x;
102
+ n = 2;
103
+ break;
104
+ default:
105
+ x = *(uint8_t *)haddr;
106
+ ret_be = (ret_be << 8) | x;
107
+ n = 1;
108
+ break;
109
+ case 0:
110
+ g_assert_not_reached();
111
+ }
112
+ haddr += n;
113
+ size -= n;
114
+ } while (size != 0);
115
+ return ret_be;
116
+}
117
+
118
+/**
119
+ * do_ld_parts_be4
120
+ * @p: translation parameters
121
+ * @ret_be: accumulated data
122
+ *
123
+ * As do_ld_bytes_beN, but with one atomic load.
124
+ * Four aligned bytes are guaranteed to cover the load.
125
+ */
126
+static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
127
+{
128
+ int o = p->addr & 3;
129
+ uint32_t x = load_atomic4(p->haddr - o);
130
+
131
+ x = cpu_to_be32(x);
132
+ x <<= o * 8;
133
+ x >>= (4 - p->size) * 8;
134
+ return (ret_be << (p->size * 8)) | x;
135
+}
136
+
137
+/**
138
+ * do_ld_parts_be8
139
+ * @p: translation parameters
140
+ * @ret_be: accumulated data
141
+ *
142
+ * As do_ld_bytes_beN, but with one atomic load.
143
+ * Eight aligned bytes are guaranteed to cover the load.
144
+ */
145
+static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
146
+ MMULookupPageData *p, uint64_t ret_be)
147
+{
148
+ int o = p->addr & 7;
149
+ uint64_t x = load_atomic8_or_exit(env, ra, p->haddr - o);
150
+
151
+ x = cpu_to_be64(x);
152
+ x <<= o * 8;
153
+ x >>= (8 - p->size) * 8;
154
+ return (ret_be << (p->size * 8)) | x;
155
+}
156
+
157
/*
158
* Wrapper for the above.
159
*/
160
static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
161
- uint64_t ret_be, int mmu_idx,
162
- MMUAccessType type, uintptr_t ra)
163
+ uint64_t ret_be, int mmu_idx, MMUAccessType type,
164
+ MemOp mop, uintptr_t ra)
165
{
166
+ MemOp atom;
167
+ unsigned tmp, half_size;
168
+
169
if (unlikely(p->flags & TLB_MMIO)) {
170
return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
171
- } else {
172
+ }
34
+
173
+
35
+ /*
174
+ /*
36
+ * 32-bit ops generate 32-bit results. For the result is zero test
175
+ * It is a given that we cross a page and therefore there is no
37
+ * below, we can ignore high bits, but for further optimizations we
176
+ * atomicity for the load as a whole, but subobjects may need attention.
38
+ * need to record that the high bits contain garbage.
39
+ */
177
+ */
40
+ if (ctx->type == TCG_TYPE_I32) {
178
+ atom = mop & MO_ATOM_MASK;
41
+ ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
179
+ switch (atom) {
42
+ a_mask &= MAKE_64BIT_MASK(0, 32);
180
+ case MO_ATOM_SUBALIGN:
43
+ z_mask &= MAKE_64BIT_MASK(0, 32);
181
+ return do_ld_parts_beN(p, ret_be);
44
+ }
182
+
45
+
183
+ case MO_ATOM_IFALIGN_PAIR:
46
+ if (z_mask == 0) {
184
+ case MO_ATOM_WITHIN16_PAIR:
47
+ return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
185
+ tmp = mop & MO_SIZE;
48
+ }
186
+ tmp = tmp ? tmp - 1 : 0;
49
+ if (a_mask == 0) {
187
+ half_size = 1 << tmp;
50
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
188
+ if (atom == MO_ATOM_IFALIGN_PAIR
51
+ }
189
+ ? p->size == half_size
52
+ return false;
190
+ : p->size >= half_size) {
53
+}
191
+ if (!HAVE_al8_fast && p->size < 4) {
54
+
192
+ return do_ld_whole_be4(p, ret_be);
55
/*
193
+ } else {
56
* Convert @op to NOT, if NOT is supported by the host.
194
+ return do_ld_whole_be8(env, ra, p, ret_be);
57
* Return true f the conversion is successful, which will still
195
+ }
58
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
196
+ }
59
197
+ /* fall through */
60
static bool fold_and(OptContext *ctx, TCGOp *op)
198
+
61
{
199
+ case MO_ATOM_IFALIGN:
62
+ uint64_t z1, z2;
200
+ case MO_ATOM_WITHIN16:
63
+
201
+ case MO_ATOM_NONE:
64
if (fold_const2(ctx, op) ||
202
return do_ld_bytes_beN(p, ret_be);
65
fold_xi_to_i(ctx, op, 0) ||
203
+
66
fold_xi_to_x(ctx, op, -1) ||
67
fold_xx_to_x(ctx, op)) {
68
return true;
69
}
70
- return false;
71
+
72
+ z1 = arg_info(op->args[1])->z_mask;
73
+ z2 = arg_info(op->args[2])->z_mask;
74
+ ctx->z_mask = z1 & z2;
75
+
76
+ /*
77
+ * Known-zeros does not imply known-ones. Therefore unless
78
+ * arg2 is constant, we can't infer affected bits from it.
79
+ */
80
+ if (arg_is_const(op->args[2])) {
81
+ ctx->a_mask = z1 & ~z2;
82
+ }
83
+
84
+ return fold_masks(ctx, op);
85
}
86
87
static bool fold_andc(OptContext *ctx, TCGOp *op)
88
{
89
+ uint64_t z1;
90
+
91
if (fold_const2(ctx, op) ||
92
fold_xx_to_i(ctx, op, 0) ||
93
fold_xi_to_x(ctx, op, 0) ||
94
fold_ix_to_not(ctx, op, -1)) {
95
return true;
96
}
97
- return false;
98
+
99
+ z1 = arg_info(op->args[1])->z_mask;
100
+
101
+ /*
102
+ * Known-zeros does not imply known-ones. Therefore unless
103
+ * arg2 is constant, we can't infer anything from it.
104
+ */
105
+ if (arg_is_const(op->args[2])) {
106
+ uint64_t z2 = ~arg_info(op->args[2])->z_mask;
107
+ ctx->a_mask = z1 & ~z2;
108
+ z1 &= z2;
109
+ }
110
+ ctx->z_mask = z1;
111
+
112
+ return fold_masks(ctx, op);
113
}
114
115
static bool fold_brcond(OptContext *ctx, TCGOp *op)
116
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
117
118
static bool fold_bswap(OptContext *ctx, TCGOp *op)
119
{
120
+ uint64_t z_mask, sign;
121
+
122
if (arg_is_const(op->args[1])) {
123
uint64_t t = arg_info(op->args[1])->val;
124
125
t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
126
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
127
}
128
- return false;
129
+
130
+ z_mask = arg_info(op->args[1])->z_mask;
131
+ switch (op->opc) {
132
+ case INDEX_op_bswap16_i32:
133
+ case INDEX_op_bswap16_i64:
134
+ z_mask = bswap16(z_mask);
135
+ sign = INT16_MIN;
136
+ break;
137
+ case INDEX_op_bswap32_i32:
138
+ case INDEX_op_bswap32_i64:
139
+ z_mask = bswap32(z_mask);
140
+ sign = INT32_MIN;
141
+ break;
142
+ case INDEX_op_bswap64_i64:
143
+ z_mask = bswap64(z_mask);
144
+ sign = INT64_MIN;
145
+ break;
146
+ default:
204
+ default:
147
+ g_assert_not_reached();
205
+ g_assert_not_reached();
148
+ }
206
}
149
+
207
}
150
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
208
151
+ case TCG_BSWAP_OZ:
209
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
210
}
211
212
/* Perform the load host endian, then swap if necessary. */
213
- ret = load_memop(p->haddr, MO_UW);
214
+ ret = load_atom_2(env, ra, p->haddr, memop);
215
if (memop & MO_BSWAP) {
216
ret = bswap16(ret);
217
}
218
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
219
}
220
221
/* Perform the load host endian. */
222
- ret = load_memop(p->haddr, MO_UL);
223
+ ret = load_atom_4(env, ra, p->haddr, memop);
224
if (memop & MO_BSWAP) {
225
ret = bswap32(ret);
226
}
227
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
228
}
229
230
/* Perform the load host endian. */
231
- ret = load_memop(p->haddr, MO_UQ);
232
+ ret = load_atom_8(env, ra, p->haddr, memop);
233
if (memop & MO_BSWAP) {
234
ret = bswap64(ret);
235
}
236
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
237
return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
238
}
239
240
- ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
241
- ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
242
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
243
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
244
if ((l.memop & MO_BSWAP) == MO_LE) {
245
ret = bswap32(ret);
246
}
247
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
248
return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
249
}
250
251
- ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
252
- ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
253
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
254
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
255
if ((l.memop & MO_BSWAP) == MO_LE) {
256
ret = bswap64(ret);
257
}
258
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
259
index XXXXXXX..XXXXXXX 100644
260
--- a/accel/tcg/user-exec.c
261
+++ b/accel/tcg/user-exec.c
262
@@ -XXX,XX +XXX,XX @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
263
return ret;
264
}
265
266
+#include "ldst_atomicity.c.inc"
267
+
268
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
269
MemOpIdx oi, uintptr_t ra)
270
{
271
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
272
273
validate_memop(oi, MO_BEUW);
274
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
275
- ret = lduw_be_p(haddr);
276
+ ret = load_atom_2(env, ra, haddr, get_memop(oi));
277
clear_helper_retaddr();
278
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
279
- return ret;
280
+ return cpu_to_be16(ret);
281
}
282
283
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
284
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
285
286
validate_memop(oi, MO_BEUL);
287
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
288
- ret = ldl_be_p(haddr);
289
+ ret = load_atom_4(env, ra, haddr, get_memop(oi));
290
clear_helper_retaddr();
291
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
292
- return ret;
293
+ return cpu_to_be32(ret);
294
}
295
296
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
297
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
298
299
validate_memop(oi, MO_BEUQ);
300
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
301
- ret = ldq_be_p(haddr);
302
+ ret = load_atom_8(env, ra, haddr, get_memop(oi));
303
clear_helper_retaddr();
304
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
305
- return ret;
306
+ return cpu_to_be64(ret);
307
}
308
309
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
310
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
311
312
validate_memop(oi, MO_LEUW);
313
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
314
- ret = lduw_le_p(haddr);
315
+ ret = load_atom_2(env, ra, haddr, get_memop(oi));
316
clear_helper_retaddr();
317
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
318
- return ret;
319
+ return cpu_to_le16(ret);
320
}
321
322
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
323
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
324
325
validate_memop(oi, MO_LEUL);
326
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
327
- ret = ldl_le_p(haddr);
328
+ ret = load_atom_4(env, ra, haddr, get_memop(oi));
329
clear_helper_retaddr();
330
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
331
- return ret;
332
+ return cpu_to_le32(ret);
333
}
334
335
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
336
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
337
338
validate_memop(oi, MO_LEUQ);
339
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
340
- ret = ldq_le_p(haddr);
341
+ ret = load_atom_8(env, ra, haddr, get_memop(oi));
342
clear_helper_retaddr();
343
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
344
- return ret;
345
+ return cpu_to_le64(ret);
346
}
347
348
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
349
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
350
new file mode 100644
351
index XXXXXXX..XXXXXXX
352
--- /dev/null
353
+++ b/accel/tcg/ldst_atomicity.c.inc
354
@@ -XXX,XX +XXX,XX @@
355
+/*
356
+ * Routines common to user and system emulation of load/store.
357
+ *
358
+ * Copyright (c) 2022 Linaro, Ltd.
359
+ *
360
+ * SPDX-License-Identifier: GPL-2.0-or-later
361
+ *
362
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
363
+ * See the COPYING file in the top-level directory.
364
+ */
365
+
366
+#ifdef CONFIG_ATOMIC64
367
+# define HAVE_al8 true
368
+#else
369
+# define HAVE_al8 false
370
+#endif
371
+#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
372
+
373
+#if defined(CONFIG_ATOMIC128)
374
+# define HAVE_al16_fast true
375
+#else
376
+# define HAVE_al16_fast false
377
+#endif
378
+
379
+/**
380
+ * required_atomicity:
381
+ *
382
+ * Return the lg2 bytes of atomicity required by @memop for @p.
383
+ * If the operation must be split into two operations to be
384
+ * examined separately for atomicity, return -lg2.
385
+ */
386
+static int required_atomicity(CPUArchState *env, uintptr_t p, MemOp memop)
387
+{
388
+ MemOp atom = memop & MO_ATOM_MASK;
389
+ MemOp size = memop & MO_SIZE;
390
+ MemOp half = size ? size - 1 : 0;
391
+ unsigned tmp;
392
+ int atmax;
393
+
394
+ switch (atom) {
395
+ case MO_ATOM_NONE:
396
+ atmax = MO_8;
152
+ break;
397
+ break;
153
+ case TCG_BSWAP_OS:
398
+
154
+ /* If the sign bit may be 1, force all the bits above to 1. */
399
+ case MO_ATOM_IFALIGN_PAIR:
155
+ if (z_mask & sign) {
400
+ size = half;
156
+ z_mask |= sign;
401
+ /* fall through */
402
+
403
+ case MO_ATOM_IFALIGN:
404
+ tmp = (1 << size) - 1;
405
+ atmax = p & tmp ? MO_8 : size;
406
+ break;
407
+
408
+ case MO_ATOM_WITHIN16:
409
+ tmp = p & 15;
410
+ atmax = (tmp + (1 << size) <= 16 ? size : MO_8);
411
+ break;
412
+
413
+ case MO_ATOM_WITHIN16_PAIR:
414
+ tmp = p & 15;
415
+ if (tmp + (1 << size) <= 16) {
416
+ atmax = size;
417
+ } else if (tmp + (1 << half) == 16) {
418
+ /*
419
+ * The pair exactly straddles the boundary.
420
+ * Both halves are naturally aligned and atomic.
421
+ */
422
+ atmax = half;
423
+ } else {
424
+ /*
425
+ * One of the pair crosses the boundary, and is non-atomic.
426
+ * The other of the pair does not cross, and is atomic.
427
+ */
428
+ atmax = -half;
157
+ }
429
+ }
158
+ break;
430
+ break;
159
+ default:
431
+
160
+ /* The high bits are undefined: force all bits above the sign to 1. */
432
+ case MO_ATOM_SUBALIGN:
161
+ z_mask |= sign << 1;
433
+ /*
434
+ * Examine the alignment of p to determine if there are subobjects
435
+ * that must be aligned. Note that we only really need ctz4() --
436
+ * any more sigificant bits are discarded by the immediately
437
+ * following comparison.
438
+ */
439
+ tmp = ctz32(p);
440
+ atmax = MIN(size, tmp);
162
+ break;
441
+ break;
163
+ }
442
+
164
+ ctx->z_mask = z_mask;
165
+
166
+ return fold_masks(ctx, op);
167
}
168
169
static bool fold_call(OptContext *ctx, TCGOp *op)
170
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
171
172
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
173
{
174
+ uint64_t z_mask;
175
+
176
if (arg_is_const(op->args[1])) {
177
uint64_t t = arg_info(op->args[1])->val;
178
179
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
180
}
181
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
182
}
183
+
184
+ switch (ctx->type) {
185
+ case TCG_TYPE_I32:
186
+ z_mask = 31;
187
+ break;
188
+ case TCG_TYPE_I64:
189
+ z_mask = 63;
190
+ break;
191
+ default:
443
+ default:
192
+ g_assert_not_reached();
444
+ g_assert_not_reached();
193
+ }
445
+ }
194
+ ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
446
+
195
+
447
+ /*
196
return false;
448
+ * Here we have the architectural atomicity of the operation.
197
}
449
+ * However, when executing in a serial context, we need no extra
198
450
+ * host atomicity in order to avoid racing. This reduction
199
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
451
+ * avoids looping with cpu_loop_exit_atomic.
200
{
452
+ */
201
- return fold_const1(ctx, op);
453
+ if (cpu_in_serial_context(env_cpu(env))) {
202
+ if (fold_const1(ctx, op)) {
454
+ return MO_8;
203
+ return true;
455
+ }
204
+ }
456
+ return atmax;
205
+
457
+}
206
+ switch (ctx->type) {
458
+
207
+ case TCG_TYPE_I32:
459
+/**
208
+ ctx->z_mask = 32 | 31;
460
+ * load_atomic2:
209
+ break;
461
+ * @pv: host address
210
+ case TCG_TYPE_I64:
462
+ *
211
+ ctx->z_mask = 64 | 63;
463
+ * Atomically load 2 aligned bytes from @pv.
212
+ break;
464
+ */
465
+static inline uint16_t load_atomic2(void *pv)
466
+{
467
+ uint16_t *p = __builtin_assume_aligned(pv, 2);
468
+ return qatomic_read(p);
469
+}
470
+
471
+/**
472
+ * load_atomic4:
473
+ * @pv: host address
474
+ *
475
+ * Atomically load 4 aligned bytes from @pv.
476
+ */
477
+static inline uint32_t load_atomic4(void *pv)
478
+{
479
+ uint32_t *p = __builtin_assume_aligned(pv, 4);
480
+ return qatomic_read(p);
481
+}
482
+
483
+/**
484
+ * load_atomic8:
485
+ * @pv: host address
486
+ *
487
+ * Atomically load 8 aligned bytes from @pv.
488
+ */
489
+static inline uint64_t load_atomic8(void *pv)
490
+{
491
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
492
+
493
+ qemu_build_assert(HAVE_al8);
494
+ return qatomic_read__nocheck(p);
495
+}
496
+
497
+/**
498
+ * load_atomic16:
499
+ * @pv: host address
500
+ *
501
+ * Atomically load 16 aligned bytes from @pv.
502
+ */
503
+static inline Int128 load_atomic16(void *pv)
504
+{
505
+#ifdef CONFIG_ATOMIC128
506
+ __uint128_t *p = __builtin_assume_aligned(pv, 16);
507
+ Int128Alias r;
508
+
509
+ r.u = qatomic_read__nocheck(p);
510
+ return r.s;
511
+#else
512
+ qemu_build_not_reached();
513
+#endif
514
+}
515
+
516
+/**
517
+ * load_atomic8_or_exit:
518
+ * @env: cpu context
519
+ * @ra: host unwind address
520
+ * @pv: host address
521
+ *
522
+ * Atomically load 8 aligned bytes from @pv.
523
+ * If this is not possible, longjmp out to restart serially.
524
+ */
525
+static uint64_t load_atomic8_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
526
+{
527
+ if (HAVE_al8) {
528
+ return load_atomic8(pv);
529
+ }
530
+
531
+#ifdef CONFIG_USER_ONLY
532
+ /*
533
+ * If the page is not writable, then assume the value is immutable
534
+ * and requires no locking. This ignores the case of MAP_SHARED with
535
+ * another process, because the fallback start_exclusive solution
536
+ * provides no protection across processes.
537
+ */
538
+ if (!page_check_range(h2g(pv), 8, PAGE_WRITE)) {
539
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
540
+ return *p;
541
+ }
542
+#endif
543
+
544
+ /* Ultimate fallback: re-execute in serial context. */
545
+ cpu_loop_exit_atomic(env_cpu(env), ra);
546
+}
547
+
548
+/**
549
+ * load_atomic16_or_exit:
550
+ * @env: cpu context
551
+ * @ra: host unwind address
552
+ * @pv: host address
553
+ *
554
+ * Atomically load 16 aligned bytes from @pv.
555
+ * If this is not possible, longjmp out to restart serially.
556
+ */
557
+static Int128 load_atomic16_or_exit(CPUArchState *env, uintptr_t ra, void *pv)
558
+{
559
+ Int128 *p = __builtin_assume_aligned(pv, 16);
560
+
561
+ if (HAVE_al16_fast) {
562
+ return load_atomic16(p);
563
+ }
564
+
565
+#ifdef CONFIG_USER_ONLY
566
+ /*
567
+ * We can only use cmpxchg to emulate a load if the page is writable.
568
+ * If the page is not writable, then assume the value is immutable
569
+ * and requires no locking. This ignores the case of MAP_SHARED with
570
+ * another process, because the fallback start_exclusive solution
571
+ * provides no protection across processes.
572
+ */
573
+ if (!page_check_range(h2g(p), 16, PAGE_WRITE)) {
574
+ return *p;
575
+ }
576
+#endif
577
+
578
+ /*
579
+ * In system mode all guest pages are writable, and for user-only
580
+ * we have just checked writability. Try cmpxchg.
581
+ */
582
+#if defined(CONFIG_CMPXCHG128)
583
+ /* Swap 0 with 0, with the side-effect of returning the old value. */
584
+ {
585
+ Int128Alias r;
586
+ r.u = __sync_val_compare_and_swap_16((__uint128_t *)p, 0, 0);
587
+ return r.s;
588
+ }
589
+#endif
590
+
591
+ /* Ultimate fallback: re-execute in serial context. */
592
+ cpu_loop_exit_atomic(env_cpu(env), ra);
593
+}
594
+
595
+/**
596
+ * load_atom_extract_al4x2:
597
+ * @pv: host address
598
+ *
599
+ * Load 4 bytes from @p, from two sequential atomic 4-byte loads.
600
+ */
601
+static uint32_t load_atom_extract_al4x2(void *pv)
602
+{
603
+ uintptr_t pi = (uintptr_t)pv;
604
+ int sh = (pi & 3) * 8;
605
+ uint32_t a, b;
606
+
607
+ pv = (void *)(pi & ~3);
608
+ a = load_atomic4(pv);
609
+ b = load_atomic4(pv + 4);
610
+
611
+ if (HOST_BIG_ENDIAN) {
612
+ return (a << sh) | (b >> (-sh & 31));
613
+ } else {
614
+ return (a >> sh) | (b << (-sh & 31));
615
+ }
616
+}
617
+
618
+/**
619
+ * load_atom_extract_al8x2:
620
+ * @pv: host address
621
+ *
622
+ * Load 8 bytes from @p, from two sequential atomic 8-byte loads.
623
+ */
624
+static uint64_t load_atom_extract_al8x2(void *pv)
625
+{
626
+ uintptr_t pi = (uintptr_t)pv;
627
+ int sh = (pi & 7) * 8;
628
+ uint64_t a, b;
629
+
630
+ pv = (void *)(pi & ~7);
631
+ a = load_atomic8(pv);
632
+ b = load_atomic8(pv + 8);
633
+
634
+ if (HOST_BIG_ENDIAN) {
635
+ return (a << sh) | (b >> (-sh & 63));
636
+ } else {
637
+ return (a >> sh) | (b << (-sh & 63));
638
+ }
639
+}
640
+
641
+/**
642
+ * load_atom_extract_al8_or_exit:
643
+ * @env: cpu context
644
+ * @ra: host unwind address
645
+ * @pv: host address
646
+ * @s: object size in bytes, @s <= 4.
647
+ *
648
+ * Atomically load @s bytes from @p, when p % s != 0, and [p, p+s-1] does
649
+ * not cross an 8-byte boundary. This means that we can perform an atomic
650
+ * 8-byte load and extract.
651
+ * The value is returned in the low bits of a uint32_t.
652
+ */
653
+static uint32_t load_atom_extract_al8_or_exit(CPUArchState *env, uintptr_t ra,
654
+ void *pv, int s)
655
+{
656
+ uintptr_t pi = (uintptr_t)pv;
657
+ int o = pi & 7;
658
+ int shr = (HOST_BIG_ENDIAN ? 8 - s - o : o) * 8;
659
+
660
+ pv = (void *)(pi & ~7);
661
+ return load_atomic8_or_exit(env, ra, pv) >> shr;
662
+}
663
+
664
+/**
665
+ * load_atom_extract_al16_or_exit:
666
+ * @env: cpu context
667
+ * @ra: host unwind address
668
+ * @p: host address
669
+ * @s: object size in bytes, @s <= 8.
670
+ *
671
+ * Atomically load @s bytes from @p, when p % 16 < 8
672
+ * and p % 16 + s > 8. I.e. does not cross a 16-byte
673
+ * boundary, but *does* cross an 8-byte boundary.
674
+ * This is the slow version, so we must have eliminated
675
+ * any faster load_atom_extract_al8_or_exit case.
676
+ *
677
+ * If this is not possible, longjmp out to restart serially.
678
+ */
679
+static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
680
+ void *pv, int s)
681
+{
682
+ uintptr_t pi = (uintptr_t)pv;
683
+ int o = pi & 7;
684
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
685
+ Int128 r;
686
+
687
+ /*
688
+ * Note constraints above: p & 8 must be clear.
689
+ * Provoke SIGBUS if possible otherwise.
690
+ */
691
+ pv = (void *)(pi & ~7);
692
+ r = load_atomic16_or_exit(env, ra, pv);
693
+
694
+ r = int128_urshift(r, shr);
695
+ return int128_getlo(r);
696
+}
697
+
698
+/**
699
+ * load_atom_extract_al16_or_al8:
700
+ * @p: host address
701
+ * @s: object size in bytes, @s <= 8.
702
+ *
703
+ * Load @s bytes from @p, when p % s != 0. If [p, p+s-1] does not
704
+ * cross an 16-byte boundary then the access must be 16-byte atomic,
705
+ * otherwise the access must be 8-byte atomic.
706
+ */
707
+static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
708
+{
709
+#if defined(CONFIG_ATOMIC128)
710
+ uintptr_t pi = (uintptr_t)pv;
711
+ int o = pi & 7;
712
+ int shr = (HOST_BIG_ENDIAN ? 16 - s - o : o) * 8;
713
+ __uint128_t r;
714
+
715
+ pv = (void *)(pi & ~7);
716
+ if (pi & 8) {
717
+ uint64_t *p8 = __builtin_assume_aligned(pv, 16, 8);
718
+ uint64_t a = qatomic_read__nocheck(p8);
719
+ uint64_t b = qatomic_read__nocheck(p8 + 1);
720
+
721
+ if (HOST_BIG_ENDIAN) {
722
+ r = ((__uint128_t)a << 64) | b;
723
+ } else {
724
+ r = ((__uint128_t)b << 64) | a;
725
+ }
726
+ } else {
727
+ __uint128_t *p16 = __builtin_assume_aligned(pv, 16, 0);
728
+ r = qatomic_read__nocheck(p16);
729
+ }
730
+ return r >> shr;
731
+#else
732
+ qemu_build_not_reached();
733
+#endif
734
+}
735
+
736
+/**
737
+ * load_atom_4_by_2:
738
+ * @pv: host address
739
+ *
740
+ * Load 4 bytes from @pv, with two 2-byte atomic loads.
741
+ */
742
+static inline uint32_t load_atom_4_by_2(void *pv)
743
+{
744
+ uint32_t a = load_atomic2(pv);
745
+ uint32_t b = load_atomic2(pv + 2);
746
+
747
+ if (HOST_BIG_ENDIAN) {
748
+ return (a << 16) | b;
749
+ } else {
750
+ return (b << 16) | a;
751
+ }
752
+}
753
+
754
+/**
755
+ * load_atom_8_by_2:
756
+ * @pv: host address
757
+ *
758
+ * Load 8 bytes from @pv, with four 2-byte atomic loads.
759
+ */
760
+static inline uint64_t load_atom_8_by_2(void *pv)
761
+{
762
+ uint32_t a = load_atom_4_by_2(pv);
763
+ uint32_t b = load_atom_4_by_2(pv + 4);
764
+
765
+ if (HOST_BIG_ENDIAN) {
766
+ return ((uint64_t)a << 32) | b;
767
+ } else {
768
+ return ((uint64_t)b << 32) | a;
769
+ }
770
+}
771
+
772
+/**
773
+ * load_atom_8_by_4:
774
+ * @pv: host address
775
+ *
776
+ * Load 8 bytes from @pv, with two 4-byte atomic loads.
777
+ */
778
+static inline uint64_t load_atom_8_by_4(void *pv)
779
+{
780
+ uint32_t a = load_atomic4(pv);
781
+ uint32_t b = load_atomic4(pv + 4);
782
+
783
+ if (HOST_BIG_ENDIAN) {
784
+ return ((uint64_t)a << 32) | b;
785
+ } else {
786
+ return ((uint64_t)b << 32) | a;
787
+ }
788
+}
789
+
790
+/**
791
+ * load_atom_2:
792
+ * @p: host address
793
+ * @memop: the full memory op
794
+ *
795
+ * Load 2 bytes from @p, honoring the atomicity of @memop.
796
+ */
797
+static uint16_t load_atom_2(CPUArchState *env, uintptr_t ra,
798
+ void *pv, MemOp memop)
799
+{
800
+ uintptr_t pi = (uintptr_t)pv;
801
+ int atmax;
802
+
803
+ if (likely((pi & 1) == 0)) {
804
+ return load_atomic2(pv);
805
+ }
806
+ if (HAVE_al16_fast) {
807
+ return load_atom_extract_al16_or_al8(pv, 2);
808
+ }
809
+
810
+ atmax = required_atomicity(env, pi, memop);
811
+ switch (atmax) {
812
+ case MO_8:
813
+ return lduw_he_p(pv);
814
+ case MO_16:
815
+ /* The only case remaining is MO_ATOM_WITHIN16. */
816
+ if (!HAVE_al8_fast && (pi & 3) == 1) {
817
+ /* Big or little endian, we want the middle two bytes. */
818
+ return load_atomic4(pv - 1) >> 8;
819
+ }
820
+ if ((pi & 15) != 7) {
821
+ return load_atom_extract_al8_or_exit(env, ra, pv, 2);
822
+ }
823
+ return load_atom_extract_al16_or_exit(env, ra, pv, 2);
213
+ default:
824
+ default:
214
+ g_assert_not_reached();
825
+ g_assert_not_reached();
215
+ }
826
+ }
216
+ return false;
827
+}
217
}
828
+
218
829
+/**
219
static bool fold_deposit(OptContext *ctx, TCGOp *op)
830
+ * load_atom_4:
220
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
831
+ * @p: host address
221
t1 = deposit64(t1, op->args[3], op->args[4], t2);
832
+ * @memop: the full memory op
222
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
833
+ *
223
}
834
+ * Load 4 bytes from @p, honoring the atomicity of @memop.
224
+
835
+ */
225
+ ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
836
+static uint32_t load_atom_4(CPUArchState *env, uintptr_t ra,
226
+ op->args[3], op->args[4],
837
+ void *pv, MemOp memop)
227
+ arg_info(op->args[2])->z_mask);
838
+{
228
return false;
839
+ uintptr_t pi = (uintptr_t)pv;
229
}
840
+ int atmax;
230
841
+
231
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
842
+ if (likely((pi & 3) == 0)) {
232
843
+ return load_atomic4(pv);
233
static bool fold_extract(OptContext *ctx, TCGOp *op)
844
+ }
234
{
845
+ if (HAVE_al16_fast) {
235
+ uint64_t z_mask_old, z_mask;
846
+ return load_atom_extract_al16_or_al8(pv, 4);
236
+
847
+ }
237
if (arg_is_const(op->args[1])) {
848
+
238
uint64_t t;
849
+ atmax = required_atomicity(env, pi, memop);
239
850
+ switch (atmax) {
240
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
851
+ case MO_8:
241
t = extract64(t, op->args[2], op->args[3]);
852
+ case MO_16:
242
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
853
+ case -MO_16:
243
}
854
+ /*
244
- return false;
855
+ * For MO_ATOM_IFALIGN, this is more atomicity than required,
245
+
856
+ * but it's trivially supported on all hosts, better than 4
246
+ z_mask_old = arg_info(op->args[1])->z_mask;
857
+ * individual byte loads (when the host requires alignment),
247
+ z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
858
+ * and overlaps with the MO_ATOM_SUBALIGN case of p % 2 == 0.
248
+ if (op->args[2] == 0) {
859
+ */
249
+ ctx->a_mask = z_mask_old ^ z_mask;
860
+ return load_atom_extract_al4x2(pv);
250
+ }
861
+ case MO_32:
251
+ ctx->z_mask = z_mask;
862
+ if (!(pi & 4)) {
252
+
863
+ return load_atom_extract_al8_or_exit(env, ra, pv, 4);
253
+ return fold_masks(ctx, op);
864
+ }
254
}
865
+ return load_atom_extract_al16_or_exit(env, ra, pv, 4);
255
256
static bool fold_extract2(OptContext *ctx, TCGOp *op)
257
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
258
259
static bool fold_exts(OptContext *ctx, TCGOp *op)
260
{
261
- return fold_const1(ctx, op);
262
+ uint64_t z_mask_old, z_mask, sign;
263
+ bool type_change = false;
264
+
265
+ if (fold_const1(ctx, op)) {
266
+ return true;
267
+ }
268
+
269
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
270
+
271
+ switch (op->opc) {
272
+ CASE_OP_32_64(ext8s):
273
+ sign = INT8_MIN;
274
+ z_mask = (uint8_t)z_mask;
275
+ break;
276
+ CASE_OP_32_64(ext16s):
277
+ sign = INT16_MIN;
278
+ z_mask = (uint16_t)z_mask;
279
+ break;
280
+ case INDEX_op_ext_i32_i64:
281
+ type_change = true;
282
+ QEMU_FALLTHROUGH;
283
+ case INDEX_op_ext32s_i64:
284
+ sign = INT32_MIN;
285
+ z_mask = (uint32_t)z_mask;
286
+ break;
287
+ default:
866
+ default:
288
+ g_assert_not_reached();
867
+ g_assert_not_reached();
289
+ }
868
+ }
290
+
869
+}
291
+ if (z_mask & sign) {
870
+
292
+ z_mask |= sign;
871
+/**
293
+ } else if (!type_change) {
872
+ * load_atom_8:
294
+ ctx->a_mask = z_mask_old ^ z_mask;
873
+ * @p: host address
295
+ }
874
+ * @memop: the full memory op
296
+ ctx->z_mask = z_mask;
875
+ *
297
+
876
+ * Load 8 bytes from @p, honoring the atomicity of @memop.
298
+ return fold_masks(ctx, op);
877
+ */
299
}
878
+static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
300
879
+ void *pv, MemOp memop)
301
static bool fold_extu(OptContext *ctx, TCGOp *op)
880
+{
302
{
881
+ uintptr_t pi = (uintptr_t)pv;
303
- return fold_const1(ctx, op);
882
+ int atmax;
304
+ uint64_t z_mask_old, z_mask;
883
+
305
+ bool type_change = false;
884
+ /*
306
+
885
+ * If the host does not support 8-byte atomics, wait until we have
307
+ if (fold_const1(ctx, op)) {
886
+ * examined the atomicity parameters below.
308
+ return true;
887
+ */
309
+ }
888
+ if (HAVE_al8 && likely((pi & 7) == 0)) {
310
+
889
+ return load_atomic8(pv);
311
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
890
+ }
312
+
891
+ if (HAVE_al16_fast) {
313
+ switch (op->opc) {
892
+ return load_atom_extract_al16_or_al8(pv, 8);
314
+ CASE_OP_32_64(ext8u):
893
+ }
315
+ z_mask = (uint8_t)z_mask;
894
+
316
+ break;
895
+ atmax = required_atomicity(env, pi, memop);
317
+ CASE_OP_32_64(ext16u):
896
+ if (atmax == MO_64) {
318
+ z_mask = (uint16_t)z_mask;
897
+ if (!HAVE_al8 && (pi & 7) == 0) {
319
+ break;
898
+ load_atomic8_or_exit(env, ra, pv);
320
+ case INDEX_op_extrl_i64_i32:
899
+ }
321
+ case INDEX_op_extu_i32_i64:
900
+ return load_atom_extract_al16_or_exit(env, ra, pv, 8);
322
+ type_change = true;
901
+ }
323
+ QEMU_FALLTHROUGH;
902
+ if (HAVE_al8_fast) {
324
+ case INDEX_op_ext32u_i64:
903
+ return load_atom_extract_al8x2(pv);
325
+ z_mask = (uint32_t)z_mask;
904
+ }
326
+ break;
905
+ switch (atmax) {
327
+ case INDEX_op_extrh_i64_i32:
906
+ case MO_8:
328
+ type_change = true;
907
+ return ldq_he_p(pv);
329
+ z_mask >>= 32;
908
+ case MO_16:
330
+ break;
909
+ return load_atom_8_by_2(pv);
910
+ case MO_32:
911
+ return load_atom_8_by_4(pv);
912
+ case -MO_32:
913
+ if (HAVE_al8) {
914
+ return load_atom_extract_al8x2(pv);
915
+ }
916
+ cpu_loop_exit_atomic(env_cpu(env), ra);
331
+ default:
917
+ default:
332
+ g_assert_not_reached();
918
+ g_assert_not_reached();
333
+ }
919
+ }
334
+
920
+}
335
+ ctx->z_mask = z_mask;
336
+ if (!type_change) {
337
+ ctx->a_mask = z_mask_old ^ z_mask;
338
+ }
339
+ return fold_masks(ctx, op);
340
}
341
342
static bool fold_mb(OptContext *ctx, TCGOp *op)
343
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
344
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
345
}
346
347
+ ctx->z_mask = arg_info(op->args[3])->z_mask
348
+ | arg_info(op->args[4])->z_mask;
349
+
350
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
351
uint64_t tv = arg_info(op->args[3])->val;
352
uint64_t fv = arg_info(op->args[4])->val;
353
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
354
355
static bool fold_neg(OptContext *ctx, TCGOp *op)
356
{
357
+ uint64_t z_mask;
358
+
359
if (fold_const1(ctx, op)) {
360
return true;
361
}
362
+
363
+ /* Set to 1 all bits to the left of the rightmost. */
364
+ z_mask = arg_info(op->args[1])->z_mask;
365
+ ctx->z_mask = -(z_mask & -z_mask);
366
+
367
/*
368
* Because of fold_sub_to_neg, we want to always return true,
369
* via finish_folding.
370
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
371
fold_xx_to_x(ctx, op)) {
372
return true;
373
}
374
- return false;
375
+
376
+ ctx->z_mask = arg_info(op->args[1])->z_mask
377
+ | arg_info(op->args[2])->z_mask;
378
+ return fold_masks(ctx, op);
379
}
380
381
static bool fold_orc(OptContext *ctx, TCGOp *op)
382
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
383
384
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
385
{
386
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
387
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
388
+ MemOp mop = get_memop(oi);
389
+ int width = 8 * memop_size(mop);
390
+
391
+ if (!(mop & MO_SIGN) && width < 64) {
392
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
393
+ }
394
+
395
/* Opcodes that touch guest memory stop the mb optimization. */
396
ctx->prev_mb = NULL;
397
return false;
398
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
399
if (i >= 0) {
400
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
401
}
402
+
403
+ ctx->z_mask = 1;
404
return false;
405
}
406
407
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
408
op->opc = INDEX_op_setcond_i32;
409
break;
410
}
411
+
412
+ ctx->z_mask = 1;
413
return false;
414
415
do_setcond_const:
416
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
417
418
static bool fold_sextract(OptContext *ctx, TCGOp *op)
419
{
420
+ int64_t z_mask_old, z_mask;
421
+
422
if (arg_is_const(op->args[1])) {
423
uint64_t t;
424
425
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
426
t = sextract64(t, op->args[2], op->args[3]);
427
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
428
}
429
- return false;
430
+
431
+ z_mask_old = arg_info(op->args[1])->z_mask;
432
+ z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
433
+ if (op->args[2] == 0 && z_mask >= 0) {
434
+ ctx->a_mask = z_mask_old ^ z_mask;
435
+ }
436
+ ctx->z_mask = z_mask;
437
+
438
+ return fold_masks(ctx, op);
439
}
440
441
static bool fold_shift(OptContext *ctx, TCGOp *op)
442
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
443
fold_xi_to_x(ctx, op, 0)) {
444
return true;
445
}
446
+
447
+ if (arg_is_const(op->args[2])) {
448
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type,
449
+ arg_info(op->args[1])->z_mask,
450
+ arg_info(op->args[2])->val);
451
+ return fold_masks(ctx, op);
452
+ }
453
return false;
454
}
455
456
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
457
return fold_addsub2_i32(ctx, op, false);
458
}
459
460
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
461
+{
462
+ /* We can't do any folding with a load, but we can record bits. */
463
+ switch (op->opc) {
464
+ CASE_OP_32_64(ld8u):
465
+ ctx->z_mask = MAKE_64BIT_MASK(0, 8);
466
+ break;
467
+ CASE_OP_32_64(ld16u):
468
+ ctx->z_mask = MAKE_64BIT_MASK(0, 16);
469
+ break;
470
+ case INDEX_op_ld32u_i64:
471
+ ctx->z_mask = MAKE_64BIT_MASK(0, 32);
472
+ break;
473
+ default:
474
+ g_assert_not_reached();
475
+ }
476
+ return false;
477
+}
478
+
479
static bool fold_xor(OptContext *ctx, TCGOp *op)
480
{
481
if (fold_const2(ctx, op) ||
482
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
483
fold_xi_to_not(ctx, op, -1)) {
484
return true;
485
}
486
- return false;
487
+
488
+ ctx->z_mask = arg_info(op->args[1])->z_mask
489
+ | arg_info(op->args[2])->z_mask;
490
+ return fold_masks(ctx, op);
491
}
492
493
/* Propagate constants and copies, fold constant expressions. */
494
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
495
}
496
497
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
498
- uint64_t z_mask, partmask, affected, tmp;
499
TCGOpcode opc = op->opc;
500
const TCGOpDef *def;
501
bool done = false;
502
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
503
break;
504
}
505
506
- /* Simplify using known-zero bits. Currently only ops with a single
507
- output argument is supported. */
508
- z_mask = -1;
509
- affected = -1;
510
- switch (opc) {
511
- CASE_OP_32_64(ext8s):
512
- if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
513
- break;
514
- }
515
- QEMU_FALLTHROUGH;
516
- CASE_OP_32_64(ext8u):
517
- z_mask = 0xff;
518
- goto and_const;
519
- CASE_OP_32_64(ext16s):
520
- if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
521
- break;
522
- }
523
- QEMU_FALLTHROUGH;
524
- CASE_OP_32_64(ext16u):
525
- z_mask = 0xffff;
526
- goto and_const;
527
- case INDEX_op_ext32s_i64:
528
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
529
- break;
530
- }
531
- QEMU_FALLTHROUGH;
532
- case INDEX_op_ext32u_i64:
533
- z_mask = 0xffffffffU;
534
- goto and_const;
535
-
536
- CASE_OP_32_64(and):
537
- z_mask = arg_info(op->args[2])->z_mask;
538
- if (arg_is_const(op->args[2])) {
539
- and_const:
540
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
541
- }
542
- z_mask = arg_info(op->args[1])->z_mask & z_mask;
543
- break;
544
-
545
- case INDEX_op_ext_i32_i64:
546
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
547
- break;
548
- }
549
- QEMU_FALLTHROUGH;
550
- case INDEX_op_extu_i32_i64:
551
- /* We do not compute affected as it is a size changing op. */
552
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
553
- break;
554
-
555
- CASE_OP_32_64(andc):
556
- /* Known-zeros does not imply known-ones. Therefore unless
557
- op->args[2] is constant, we can't infer anything from it. */
558
- if (arg_is_const(op->args[2])) {
559
- z_mask = ~arg_info(op->args[2])->z_mask;
560
- goto and_const;
561
- }
562
- /* But we certainly know nothing outside args[1] may be set. */
563
- z_mask = arg_info(op->args[1])->z_mask;
564
- break;
565
-
566
- case INDEX_op_sar_i32:
567
- if (arg_is_const(op->args[2])) {
568
- tmp = arg_info(op->args[2])->val & 31;
569
- z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
570
- }
571
- break;
572
- case INDEX_op_sar_i64:
573
- if (arg_is_const(op->args[2])) {
574
- tmp = arg_info(op->args[2])->val & 63;
575
- z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
576
- }
577
- break;
578
-
579
- case INDEX_op_shr_i32:
580
- if (arg_is_const(op->args[2])) {
581
- tmp = arg_info(op->args[2])->val & 31;
582
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
583
- }
584
- break;
585
- case INDEX_op_shr_i64:
586
- if (arg_is_const(op->args[2])) {
587
- tmp = arg_info(op->args[2])->val & 63;
588
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
589
- }
590
- break;
591
-
592
- case INDEX_op_extrl_i64_i32:
593
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
594
- break;
595
- case INDEX_op_extrh_i64_i32:
596
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
597
- break;
598
-
599
- CASE_OP_32_64(shl):
600
- if (arg_is_const(op->args[2])) {
601
- tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
602
- z_mask = arg_info(op->args[1])->z_mask << tmp;
603
- }
604
- break;
605
-
606
- CASE_OP_32_64(neg):
607
- /* Set to 1 all bits to the left of the rightmost. */
608
- z_mask = -(arg_info(op->args[1])->z_mask
609
- & -arg_info(op->args[1])->z_mask);
610
- break;
611
-
612
- CASE_OP_32_64(deposit):
613
- z_mask = deposit64(arg_info(op->args[1])->z_mask,
614
- op->args[3], op->args[4],
615
- arg_info(op->args[2])->z_mask);
616
- break;
617
-
618
- CASE_OP_32_64(extract):
619
- z_mask = extract64(arg_info(op->args[1])->z_mask,
620
- op->args[2], op->args[3]);
621
- if (op->args[2] == 0) {
622
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
623
- }
624
- break;
625
- CASE_OP_32_64(sextract):
626
- z_mask = sextract64(arg_info(op->args[1])->z_mask,
627
- op->args[2], op->args[3]);
628
- if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
629
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
630
- }
631
- break;
632
-
633
- CASE_OP_32_64(or):
634
- CASE_OP_32_64(xor):
635
- z_mask = arg_info(op->args[1])->z_mask
636
- | arg_info(op->args[2])->z_mask;
637
- break;
638
-
639
- case INDEX_op_clz_i32:
640
- case INDEX_op_ctz_i32:
641
- z_mask = arg_info(op->args[2])->z_mask | 31;
642
- break;
643
-
644
- case INDEX_op_clz_i64:
645
- case INDEX_op_ctz_i64:
646
- z_mask = arg_info(op->args[2])->z_mask | 63;
647
- break;
648
-
649
- case INDEX_op_ctpop_i32:
650
- z_mask = 32 | 31;
651
- break;
652
- case INDEX_op_ctpop_i64:
653
- z_mask = 64 | 63;
654
- break;
655
-
656
- CASE_OP_32_64(setcond):
657
- case INDEX_op_setcond2_i32:
658
- z_mask = 1;
659
- break;
660
-
661
- CASE_OP_32_64(movcond):
662
- z_mask = arg_info(op->args[3])->z_mask
663
- | arg_info(op->args[4])->z_mask;
664
- break;
665
-
666
- CASE_OP_32_64(ld8u):
667
- z_mask = 0xff;
668
- break;
669
- CASE_OP_32_64(ld16u):
670
- z_mask = 0xffff;
671
- break;
672
- case INDEX_op_ld32u_i64:
673
- z_mask = 0xffffffffu;
674
- break;
675
-
676
- CASE_OP_32_64(qemu_ld):
677
- {
678
- MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
679
- MemOp mop = get_memop(oi);
680
- if (!(mop & MO_SIGN)) {
681
- z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
682
- }
683
- }
684
- break;
685
-
686
- CASE_OP_32_64(bswap16):
687
- z_mask = arg_info(op->args[1])->z_mask;
688
- if (z_mask <= 0xffff) {
689
- op->args[2] |= TCG_BSWAP_IZ;
690
- }
691
- z_mask = bswap16(z_mask);
692
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
693
- case TCG_BSWAP_OZ:
694
- break;
695
- case TCG_BSWAP_OS:
696
- z_mask = (int16_t)z_mask;
697
- break;
698
- default: /* undefined high bits */
699
- z_mask |= MAKE_64BIT_MASK(16, 48);
700
- break;
701
- }
702
- break;
703
-
704
- case INDEX_op_bswap32_i64:
705
- z_mask = arg_info(op->args[1])->z_mask;
706
- if (z_mask <= 0xffffffffu) {
707
- op->args[2] |= TCG_BSWAP_IZ;
708
- }
709
- z_mask = bswap32(z_mask);
710
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
711
- case TCG_BSWAP_OZ:
712
- break;
713
- case TCG_BSWAP_OS:
714
- z_mask = (int32_t)z_mask;
715
- break;
716
- default: /* undefined high bits */
717
- z_mask |= MAKE_64BIT_MASK(32, 32);
718
- break;
719
- }
720
- break;
721
-
722
- default:
723
- break;
724
- }
725
-
726
- /* 32-bit ops generate 32-bit results. For the result is zero test
727
- below, we can ignore high bits, but for further optimizations we
728
- need to record that the high bits contain garbage. */
729
- partmask = z_mask;
730
- if (ctx.type == TCG_TYPE_I32) {
731
- z_mask |= ~(tcg_target_ulong)0xffffffffu;
732
- partmask &= 0xffffffffu;
733
- affected &= 0xffffffffu;
734
- }
735
- ctx.z_mask = z_mask;
736
-
737
- if (partmask == 0) {
738
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
739
- continue;
740
- }
741
- if (affected == 0) {
742
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
743
- continue;
744
- }
745
+ /* Assume all bits affected, and no bits known zero. */
746
+ ctx.a_mask = -1;
747
+ ctx.z_mask = -1;
748
749
/*
750
* Process each opcode.
751
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
752
case INDEX_op_extrh_i64_i32:
753
done = fold_extu(&ctx, op);
754
break;
755
+ CASE_OP_32_64(ld8u):
756
+ CASE_OP_32_64(ld16u):
757
+ case INDEX_op_ld32u_i64:
758
+ done = fold_tcg_ld(&ctx, op);
759
+ break;
760
case INDEX_op_mb:
761
done = fold_mb(&ctx, op);
762
break;
763
--
921
--
764
2.25.1
922
2.34.1
765
766
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
3
Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
4
so it can be reused by divu128().
5
6
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
3
---
11
include/fpu/softfloat-macros.h | 82 ----------------------------------
4
accel/tcg/cputlb.c | 108 ++++----
12
include/qemu/host-utils.h | 81 +++++++++++++++++++++++++++++++++
5
accel/tcg/user-exec.c | 12 +-
13
2 files changed, 81 insertions(+), 82 deletions(-)
6
accel/tcg/ldst_atomicity.c.inc | 491 +++++++++++++++++++++++++++++++++
7
3 files changed, 545 insertions(+), 66 deletions(-)
14
8
15
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
9
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
16
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
17
--- a/include/fpu/softfloat-macros.h
11
--- a/accel/tcg/cputlb.c
18
+++ b/include/fpu/softfloat-macros.h
12
+++ b/accel/tcg/cputlb.c
19
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
20
* so some portions are provided under:
14
* Store Helpers
21
* the SoftFloat-2a license
22
* the BSD license
23
- * GPL-v2-or-later
24
*
25
* Any future contributions to this file after December 1st 2014 will be
26
* taken to be licensed under the Softfloat-2a license unless specifically
27
@@ -XXX,XX +XXX,XX @@ this code that are retained.
28
* THE POSSIBILITY OF SUCH DAMAGE.
29
*/
15
*/
30
16
31
-/* Portions of this work are licensed under the terms of the GNU GPL,
17
-static inline void QEMU_ALWAYS_INLINE
32
- * version 2 or later. See the COPYING file in the top-level directory.
18
-store_memop(void *haddr, uint64_t val, MemOp op)
33
- */
34
-
35
#ifndef FPU_SOFTFLOAT_MACROS_H
36
#define FPU_SOFTFLOAT_MACROS_H
37
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
39
40
}
41
42
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
43
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
44
- *
45
- * Licensed under the GPLv2/LGPLv3
46
- */
47
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
48
- uint64_t n0, uint64_t d)
49
-{
19
-{
50
-#if defined(__x86_64__)
20
- switch (op) {
51
- uint64_t q;
21
- case MO_UB:
52
- asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
22
- stb_p(haddr, val);
53
- return q;
23
- break;
54
-#elif defined(__s390x__) && !defined(__clang__)
24
- case MO_BEUW:
55
- /* Need to use a TImode type to get an even register pair for DLGR. */
25
- stw_be_p(haddr, val);
56
- unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
26
- break;
57
- asm("dlgr %0, %1" : "+r"(n) : "r"(d));
27
- case MO_LEUW:
58
- *r = n >> 64;
28
- stw_le_p(haddr, val);
59
- return n;
29
- break;
60
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
30
- case MO_BEUL:
61
- /* From Power ISA 2.06, programming note for divdeu. */
31
- stl_be_p(haddr, val);
62
- uint64_t q1, q2, Q, r1, r2, R;
32
- break;
63
- asm("divdeu %0,%2,%4; divdu %1,%3,%4"
33
- case MO_LEUL:
64
- : "=&r"(q1), "=r"(q2)
34
- stl_le_p(haddr, val);
65
- : "r"(n1), "r"(n0), "r"(d));
35
- break;
66
- r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
36
- case MO_BEUQ:
67
- r2 = n0 - (q2 * d);
37
- stq_be_p(haddr, val);
68
- Q = q1 + q2;
38
- break;
69
- R = r1 + r2;
39
- case MO_LEUQ:
70
- if (R >= d || R < r2) { /* overflow implies R > d */
40
- stq_le_p(haddr, val);
71
- Q += 1;
41
- break;
72
- R -= d;
42
- default:
43
- qemu_build_not_reached();
73
- }
44
- }
74
- *r = R;
75
- return Q;
76
-#else
77
- uint64_t d0, d1, q0, q1, r1, r0, m;
78
-
79
- d0 = (uint32_t)d;
80
- d1 = d >> 32;
81
-
82
- r1 = n1 % d1;
83
- q1 = n1 / d1;
84
- m = q1 * d0;
85
- r1 = (r1 << 32) | (n0 >> 32);
86
- if (r1 < m) {
87
- q1 -= 1;
88
- r1 += d;
89
- if (r1 >= d) {
90
- if (r1 < m) {
91
- q1 -= 1;
92
- r1 += d;
93
- }
94
- }
95
- }
96
- r1 -= m;
97
-
98
- r0 = r1 % d1;
99
- q0 = r1 / d1;
100
- m = q0 * d0;
101
- r0 = (r0 << 32) | (uint32_t)n0;
102
- if (r0 < m) {
103
- q0 -= 1;
104
- r0 += d;
105
- if (r0 >= d) {
106
- if (r0 < m) {
107
- q0 -= 1;
108
- r0 += d;
109
- }
110
- }
111
- }
112
- r0 -= m;
113
-
114
- *r = r0;
115
- return (q1 << 32) | q0;
116
-#endif
117
-}
45
-}
118
-
46
-
119
/*----------------------------------------------------------------------------
47
/**
120
| Returns an approximation to the square root of the 32-bit significand given
48
* do_st_mmio_leN:
121
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
49
* @env: cpu context
122
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
50
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
123
index XXXXXXX..XXXXXXX 100644
51
return val_le;
124
--- a/include/qemu/host-utils.h
52
}
125
+++ b/include/qemu/host-utils.h
53
126
@@ -XXX,XX +XXX,XX @@
54
-/**
127
* THE SOFTWARE.
55
- * do_st_bytes_leN:
56
- * @p: translation parameters
57
- * @val_le: data to store
58
- *
59
- * Store @p->size bytes at @p->haddr, which is RAM.
60
- * The bytes to store are extracted in little-endian order from @val_le;
61
- * return the bytes of @val_le beyond @p->size that have not been stored.
62
- */
63
-static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
64
-{
65
- uint8_t *haddr = p->haddr;
66
- int i, size = p->size;
67
-
68
- for (i = 0; i < size; i++, val_le >>= 8) {
69
- haddr[i] = val_le;
70
- }
71
- return val_le;
72
-}
73
-
74
/*
75
* Wrapper for the above.
128
*/
76
*/
129
77
static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
130
+/* Portions of this work are licensed under the terms of the GNU GPL,
78
- uint64_t val_le, int mmu_idx, uintptr_t ra)
131
+ * version 2 or later. See the COPYING file in the top-level directory.
79
+ uint64_t val_le, int mmu_idx,
132
+ */
80
+ MemOp mop, uintptr_t ra)
133
+
81
{
134
#ifndef HOST_UTILS_H
82
+ MemOp atom;
135
#define HOST_UTILS_H
83
+ unsigned tmp, half_size;
136
84
+
137
@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
85
if (unlikely(p->flags & TLB_MMIO)) {
138
*/
86
return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
139
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
87
} else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
140
88
return val_le >> (p->size * 8);
141
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
89
- } else {
142
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
90
- return do_st_bytes_leN(p, val_le);
143
+ *
91
+ }
144
+ * Licensed under the GPLv2/LGPLv3
92
+
145
+ */
93
+ /*
146
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
94
+ * It is a given that we cross a page and therefore there is no atomicity
147
+ uint64_t n0, uint64_t d)
95
+ * for the store as a whole, but subobjects may need attention.
148
+{
96
+ */
149
+#if defined(__x86_64__)
97
+ atom = mop & MO_ATOM_MASK;
150
+ uint64_t q;
98
+ switch (atom) {
151
+ asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
99
+ case MO_ATOM_SUBALIGN:
152
+ return q;
100
+ return store_parts_leN(p->haddr, p->size, val_le);
153
+#elif defined(__s390x__) && !defined(__clang__)
101
+
154
+ /* Need to use a TImode type to get an even register pair for DLGR. */
102
+ case MO_ATOM_IFALIGN_PAIR:
155
+ unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
103
+ case MO_ATOM_WITHIN16_PAIR:
156
+ asm("dlgr %0, %1" : "+r"(n) : "r"(d));
104
+ tmp = mop & MO_SIZE;
157
+ *r = n >> 64;
105
+ tmp = tmp ? tmp - 1 : 0;
158
+ return n;
106
+ half_size = 1 << tmp;
159
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
107
+ if (atom == MO_ATOM_IFALIGN_PAIR
160
+ /* From Power ISA 2.06, programming note for divdeu. */
108
+ ? p->size == half_size
161
+ uint64_t q1, q2, Q, r1, r2, R;
109
+ : p->size >= half_size) {
162
+ asm("divdeu %0,%2,%4; divdu %1,%3,%4"
110
+ if (!HAVE_al8_fast && p->size <= 4) {
163
+ : "=&r"(q1), "=r"(q2)
111
+ return store_whole_le4(p->haddr, p->size, val_le);
164
+ : "r"(n1), "r"(n0), "r"(d));
112
+ } else if (HAVE_al8) {
165
+ r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
113
+ return store_whole_le8(p->haddr, p->size, val_le);
166
+ r2 = n0 - (q2 * d);
114
+ } else {
167
+ Q = q1 + q2;
115
+ cpu_loop_exit_atomic(env_cpu(env), ra);
168
+ R = r1 + r2;
169
+ if (R >= d || R < r2) { /* overflow implies R > d */
170
+ Q += 1;
171
+ R -= d;
172
+ }
173
+ *r = R;
174
+ return Q;
175
+#else
176
+ uint64_t d0, d1, q0, q1, r1, r0, m;
177
+
178
+ d0 = (uint32_t)d;
179
+ d1 = d >> 32;
180
+
181
+ r1 = n1 % d1;
182
+ q1 = n1 / d1;
183
+ m = q1 * d0;
184
+ r1 = (r1 << 32) | (n0 >> 32);
185
+ if (r1 < m) {
186
+ q1 -= 1;
187
+ r1 += d;
188
+ if (r1 >= d) {
189
+ if (r1 < m) {
190
+ q1 -= 1;
191
+ r1 += d;
192
+ }
116
+ }
193
+ }
117
+ }
194
+ }
118
+ /* fall through */
195
+ r1 -= m;
119
+
196
+
120
+ case MO_ATOM_IFALIGN:
197
+ r0 = r1 % d1;
121
+ case MO_ATOM_WITHIN16:
198
+ q0 = r1 / d1;
122
+ case MO_ATOM_NONE:
199
+ m = q0 * d0;
123
+ return store_bytes_leN(p->haddr, p->size, val_le);
200
+ r0 = (r0 << 32) | (uint32_t)n0;
124
+
201
+ if (r0 < m) {
125
+ default:
202
+ q0 -= 1;
126
+ g_assert_not_reached();
203
+ r0 += d;
127
}
204
+ if (r0 >= d) {
128
}
205
+ if (r0 < m) {
129
206
+ q0 -= 1;
130
@@ -XXX,XX +XXX,XX @@ static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
207
+ r0 += d;
131
if (memop & MO_BSWAP) {
132
val = bswap16(val);
133
}
134
- store_memop(p->haddr, val, MO_UW);
135
+ store_atom_2(env, ra, p->haddr, memop, val);
136
}
137
}
138
139
@@ -XXX,XX +XXX,XX @@ static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
140
if (memop & MO_BSWAP) {
141
val = bswap32(val);
142
}
143
- store_memop(p->haddr, val, MO_UL);
144
+ store_atom_4(env, ra, p->haddr, memop, val);
145
}
146
}
147
148
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
149
if (memop & MO_BSWAP) {
150
val = bswap64(val);
151
}
152
- store_memop(p->haddr, val, MO_UQ);
153
+ store_atom_8(env, ra, p->haddr, memop, val);
154
}
155
}
156
157
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
158
if ((l.memop & MO_BSWAP) != MO_LE) {
159
val = bswap32(val);
160
}
161
- val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
162
- (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
163
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
164
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
165
}
166
167
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
168
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
169
if ((l.memop & MO_BSWAP) != MO_LE) {
170
val = bswap64(val);
171
}
172
- val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
173
- (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
174
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
175
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
176
}
177
178
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
179
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
180
index XXXXXXX..XXXXXXX 100644
181
--- a/accel/tcg/user-exec.c
182
+++ b/accel/tcg/user-exec.c
183
@@ -XXX,XX +XXX,XX @@ void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
184
185
validate_memop(oi, MO_BEUW);
186
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
187
- stw_be_p(haddr, val);
188
+ store_atom_2(env, ra, haddr, get_memop(oi), be16_to_cpu(val));
189
clear_helper_retaddr();
190
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
191
}
192
@@ -XXX,XX +XXX,XX @@ void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
193
194
validate_memop(oi, MO_BEUL);
195
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
196
- stl_be_p(haddr, val);
197
+ store_atom_4(env, ra, haddr, get_memop(oi), be32_to_cpu(val));
198
clear_helper_retaddr();
199
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
200
}
201
@@ -XXX,XX +XXX,XX @@ void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
202
203
validate_memop(oi, MO_BEUQ);
204
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
205
- stq_be_p(haddr, val);
206
+ store_atom_8(env, ra, haddr, get_memop(oi), be64_to_cpu(val));
207
clear_helper_retaddr();
208
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
209
}
210
@@ -XXX,XX +XXX,XX @@ void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
211
212
validate_memop(oi, MO_LEUW);
213
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
214
- stw_le_p(haddr, val);
215
+ store_atom_2(env, ra, haddr, get_memop(oi), le16_to_cpu(val));
216
clear_helper_retaddr();
217
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
218
}
219
@@ -XXX,XX +XXX,XX @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
220
221
validate_memop(oi, MO_LEUL);
222
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
223
- stl_le_p(haddr, val);
224
+ store_atom_4(env, ra, haddr, get_memop(oi), le32_to_cpu(val));
225
clear_helper_retaddr();
226
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
227
}
228
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
229
230
validate_memop(oi, MO_LEUQ);
231
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
232
- stq_le_p(haddr, val);
233
+ store_atom_8(env, ra, haddr, get_memop(oi), le64_to_cpu(val));
234
clear_helper_retaddr();
235
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
236
}
237
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
238
index XXXXXXX..XXXXXXX 100644
239
--- a/accel/tcg/ldst_atomicity.c.inc
240
+++ b/accel/tcg/ldst_atomicity.c.inc
241
@@ -XXX,XX +XXX,XX @@
242
#else
243
# define HAVE_al16_fast false
244
#endif
245
+#if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128)
246
+# define HAVE_al16 true
247
+#else
248
+# define HAVE_al16 false
249
+#endif
250
+
251
252
/**
253
* required_atomicity:
254
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
255
g_assert_not_reached();
256
}
257
}
258
+
259
+/**
260
+ * store_atomic2:
261
+ * @pv: host address
262
+ * @val: value to store
263
+ *
264
+ * Atomically store 2 aligned bytes to @pv.
265
+ */
266
+static inline void store_atomic2(void *pv, uint16_t val)
267
+{
268
+ uint16_t *p = __builtin_assume_aligned(pv, 2);
269
+ qatomic_set(p, val);
270
+}
271
+
272
+/**
273
+ * store_atomic4:
274
+ * @pv: host address
275
+ * @val: value to store
276
+ *
277
+ * Atomically store 4 aligned bytes to @pv.
278
+ */
279
+static inline void store_atomic4(void *pv, uint32_t val)
280
+{
281
+ uint32_t *p = __builtin_assume_aligned(pv, 4);
282
+ qatomic_set(p, val);
283
+}
284
+
285
+/**
286
+ * store_atomic8:
287
+ * @pv: host address
288
+ * @val: value to store
289
+ *
290
+ * Atomically store 8 aligned bytes to @pv.
291
+ */
292
+static inline void store_atomic8(void *pv, uint64_t val)
293
+{
294
+ uint64_t *p = __builtin_assume_aligned(pv, 8);
295
+
296
+ qemu_build_assert(HAVE_al8);
297
+ qatomic_set__nocheck(p, val);
298
+}
299
+
300
+/**
301
+ * store_atom_4x2
302
+ */
303
+static inline void store_atom_4_by_2(void *pv, uint32_t val)
304
+{
305
+ store_atomic2(pv, val >> (HOST_BIG_ENDIAN ? 16 : 0));
306
+ store_atomic2(pv + 2, val >> (HOST_BIG_ENDIAN ? 0 : 16));
307
+}
308
+
309
+/**
310
+ * store_atom_8_by_2
311
+ */
312
+static inline void store_atom_8_by_2(void *pv, uint64_t val)
313
+{
314
+ store_atom_4_by_2(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
315
+ store_atom_4_by_2(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
316
+}
317
+
318
+/**
319
+ * store_atom_8_by_4
320
+ */
321
+static inline void store_atom_8_by_4(void *pv, uint64_t val)
322
+{
323
+ store_atomic4(pv, val >> (HOST_BIG_ENDIAN ? 32 : 0));
324
+ store_atomic4(pv + 4, val >> (HOST_BIG_ENDIAN ? 0 : 32));
325
+}
326
+
327
+/**
328
+ * store_atom_insert_al4:
329
+ * @p: host address
330
+ * @val: shifted value to store
331
+ * @msk: mask for value to store
332
+ *
333
+ * Atomically store @val to @p, masked by @msk.
334
+ */
335
+static void store_atom_insert_al4(uint32_t *p, uint32_t val, uint32_t msk)
336
+{
337
+ uint32_t old, new;
338
+
339
+ p = __builtin_assume_aligned(p, 4);
340
+ old = qatomic_read(p);
341
+ do {
342
+ new = (old & ~msk) | val;
343
+ } while (!__atomic_compare_exchange_n(p, &old, new, true,
344
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
345
+}
346
+
347
+/**
348
+ * store_atom_insert_al8:
349
+ * @p: host address
350
+ * @val: shifted value to store
351
+ * @msk: mask for value to store
352
+ *
353
+ * Atomically store @val to @p masked by @msk.
354
+ */
355
+static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
356
+{
357
+ uint64_t old, new;
358
+
359
+ qemu_build_assert(HAVE_al8);
360
+ p = __builtin_assume_aligned(p, 8);
361
+ old = qatomic_read__nocheck(p);
362
+ do {
363
+ new = (old & ~msk) | val;
364
+ } while (!__atomic_compare_exchange_n(p, &old, new, true,
365
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
366
+}
367
+
368
+/**
369
+ * store_atom_insert_al16:
370
+ * @p: host address
371
+ * @val: shifted value to store
372
+ * @msk: mask for value to store
373
+ *
374
+ * Atomically store @val to @p masked by @msk.
375
+ */
376
+static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
377
+{
378
+#if defined(CONFIG_ATOMIC128)
379
+ __uint128_t *pu, old, new;
380
+
381
+ /* With CONFIG_ATOMIC128, we can avoid the memory barriers. */
382
+ pu = __builtin_assume_aligned(ps, 16);
383
+ old = *pu;
384
+ do {
385
+ new = (old & ~msk.u) | val.u;
386
+ } while (!__atomic_compare_exchange_n(pu, &old, new, true,
387
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED));
388
+#elif defined(CONFIG_CMPXCHG128)
389
+ __uint128_t *pu, old, new;
390
+
391
+ /*
392
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
393
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
394
+ * and accept the sequential consistency that comes with it.
395
+ */
396
+ pu = __builtin_assume_aligned(ps, 16);
397
+ do {
398
+ old = *pu;
399
+ new = (old & ~msk.u) | val.u;
400
+ } while (!__sync_bool_compare_and_swap_16(pu, old, new));
401
+#else
402
+ qemu_build_not_reached();
403
+#endif
404
+}
405
+
406
+/**
407
+ * store_bytes_leN:
408
+ * @pv: host address
409
+ * @size: number of bytes to store
410
+ * @val_le: data to store
411
+ *
412
+ * Store @size bytes at @p. The bytes to store are extracted in little-endian order
413
+ * from @val_le; return the bytes of @val_le beyond @size that have not been stored.
414
+ */
415
+static uint64_t store_bytes_leN(void *pv, int size, uint64_t val_le)
416
+{
417
+ uint8_t *p = pv;
418
+ for (int i = 0; i < size; i++, val_le >>= 8) {
419
+ p[i] = val_le;
420
+ }
421
+ return val_le;
422
+}
423
+
424
+/**
425
+ * store_parts_leN
426
+ * @pv: host address
427
+ * @size: number of bytes to store
428
+ * @val_le: data to store
429
+ *
430
+ * As store_bytes_leN, but atomically on each aligned part.
431
+ */
432
+G_GNUC_UNUSED
433
+static uint64_t store_parts_leN(void *pv, int size, uint64_t val_le)
434
+{
435
+ do {
436
+ int n;
437
+
438
+ /* Find minimum of alignment and size */
439
+ switch (((uintptr_t)pv | size) & 7) {
440
+ case 4:
441
+ store_atomic4(pv, le32_to_cpu(val_le));
442
+ val_le >>= 32;
443
+ n = 4;
444
+ break;
445
+ case 2:
446
+ case 6:
447
+ store_atomic2(pv, le16_to_cpu(val_le));
448
+ val_le >>= 16;
449
+ n = 2;
450
+ break;
451
+ default:
452
+ *(uint8_t *)pv = val_le;
453
+ val_le >>= 8;
454
+ n = 1;
455
+ break;
456
+ case 0:
457
+ g_assert_not_reached();
458
+ }
459
+ pv += n;
460
+ size -= n;
461
+ } while (size != 0);
462
+
463
+ return val_le;
464
+}
465
+
466
+/**
467
+ * store_whole_le4
468
+ * @pv: host address
469
+ * @size: number of bytes to store
470
+ * @val_le: data to store
471
+ *
472
+ * As store_bytes_leN, but atomically as a whole.
473
+ * Four aligned bytes are guaranteed to cover the store.
474
+ */
475
+static uint64_t store_whole_le4(void *pv, int size, uint64_t val_le)
476
+{
477
+ int sz = size * 8;
478
+ int o = (uintptr_t)pv & 3;
479
+ int sh = o * 8;
480
+ uint32_t m = MAKE_64BIT_MASK(0, sz);
481
+ uint32_t v;
482
+
483
+ if (HOST_BIG_ENDIAN) {
484
+ v = bswap32(val_le) >> sh;
485
+ m = bswap32(m) >> sh;
486
+ } else {
487
+ v = val_le << sh;
488
+ m <<= sh;
489
+ }
490
+ store_atom_insert_al4(pv - o, v, m);
491
+ return val_le >> sz;
492
+}
493
+
494
+/**
495
+ * store_whole_le8
496
+ * @pv: host address
497
+ * @size: number of bytes to store
498
+ * @val_le: data to store
499
+ *
500
+ * As store_bytes_leN, but atomically as a whole.
501
+ * Eight aligned bytes are guaranteed to cover the store.
502
+ */
503
+static uint64_t store_whole_le8(void *pv, int size, uint64_t val_le)
504
+{
505
+ int sz = size * 8;
506
+ int o = (uintptr_t)pv & 7;
507
+ int sh = o * 8;
508
+ uint64_t m = MAKE_64BIT_MASK(0, sz);
509
+ uint64_t v;
510
+
511
+ qemu_build_assert(HAVE_al8);
512
+ if (HOST_BIG_ENDIAN) {
513
+ v = bswap64(val_le) >> sh;
514
+ m = bswap64(m) >> sh;
515
+ } else {
516
+ v = val_le << sh;
517
+ m <<= sh;
518
+ }
519
+ store_atom_insert_al8(pv - o, v, m);
520
+ return val_le >> sz;
521
+}
522
+
523
+/**
524
+ * store_whole_le16
525
+ * @pv: host address
526
+ * @size: number of bytes to store
527
+ * @val_le: data to store
528
+ *
529
+ * As store_bytes_leN, but atomically as a whole.
530
+ * 16 aligned bytes are guaranteed to cover the store.
531
+ */
532
+static uint64_t store_whole_le16(void *pv, int size, Int128 val_le)
533
+{
534
+ int sz = size * 8;
535
+ int o = (uintptr_t)pv & 15;
536
+ int sh = o * 8;
537
+ Int128 m, v;
538
+
539
+ qemu_build_assert(HAVE_al16);
540
+
541
+ /* Like MAKE_64BIT_MASK(0, sz), but larger. */
542
+ if (sz <= 64) {
543
+ m = int128_make64(MAKE_64BIT_MASK(0, sz));
544
+ } else {
545
+ m = int128_make128(-1, MAKE_64BIT_MASK(0, sz - 64));
546
+ }
547
+
548
+ if (HOST_BIG_ENDIAN) {
549
+ v = int128_urshift(bswap128(val_le), sh);
550
+ m = int128_urshift(bswap128(m), sh);
551
+ } else {
552
+ v = int128_lshift(val_le, sh);
553
+ m = int128_lshift(m, sh);
554
+ }
555
+ store_atom_insert_al16(pv - o, v, m);
556
+
557
+ /* Unused if sz <= 64. */
558
+ return int128_gethi(val_le) >> (sz - 64);
559
+}
560
+
561
+/**
562
+ * store_atom_2:
563
+ * @p: host address
564
+ * @val: the value to store
565
+ * @memop: the full memory op
566
+ *
567
+ * Store 2 bytes to @p, honoring the atomicity of @memop.
568
+ */
569
+static void store_atom_2(CPUArchState *env, uintptr_t ra,
570
+ void *pv, MemOp memop, uint16_t val)
571
+{
572
+ uintptr_t pi = (uintptr_t)pv;
573
+ int atmax;
574
+
575
+ if (likely((pi & 1) == 0)) {
576
+ store_atomic2(pv, val);
577
+ return;
578
+ }
579
+
580
+ atmax = required_atomicity(env, pi, memop);
581
+ if (atmax == MO_8) {
582
+ stw_he_p(pv, val);
583
+ return;
584
+ }
585
+
586
+ /*
587
+ * The only case remaining is MO_ATOM_WITHIN16.
588
+ * Big or little endian, we want the middle two bytes in each test.
589
+ */
590
+ if ((pi & 3) == 1) {
591
+ store_atom_insert_al4(pv - 1, (uint32_t)val << 8, MAKE_64BIT_MASK(8, 16));
592
+ return;
593
+ } else if ((pi & 7) == 3) {
594
+ if (HAVE_al8) {
595
+ store_atom_insert_al8(pv - 3, (uint64_t)val << 24, MAKE_64BIT_MASK(24, 16));
596
+ return;
597
+ }
598
+ } else if ((pi & 15) == 7) {
599
+ if (HAVE_al16) {
600
+ Int128 v = int128_lshift(int128_make64(val), 56);
601
+ Int128 m = int128_lshift(int128_make64(0xffff), 56);
602
+ store_atom_insert_al16(pv - 7, v, m);
603
+ return;
604
+ }
605
+ } else {
606
+ g_assert_not_reached();
607
+ }
608
+
609
+ cpu_loop_exit_atomic(env_cpu(env), ra);
610
+}
611
+
612
+/**
613
+ * store_atom_4:
614
+ * @p: host address
615
+ * @val: the value to store
616
+ * @memop: the full memory op
617
+ *
618
+ * Store 4 bytes to @p, honoring the atomicity of @memop.
619
+ */
620
+static void store_atom_4(CPUArchState *env, uintptr_t ra,
621
+ void *pv, MemOp memop, uint32_t val)
622
+{
623
+ uintptr_t pi = (uintptr_t)pv;
624
+ int atmax;
625
+
626
+ if (likely((pi & 3) == 0)) {
627
+ store_atomic4(pv, val);
628
+ return;
629
+ }
630
+
631
+ atmax = required_atomicity(env, pi, memop);
632
+ switch (atmax) {
633
+ case MO_8:
634
+ stl_he_p(pv, val);
635
+ return;
636
+ case MO_16:
637
+ store_atom_4_by_2(pv, val);
638
+ return;
639
+ case -MO_16:
640
+ {
641
+ uint32_t val_le = cpu_to_le32(val);
642
+ int s2 = pi & 3;
643
+ int s1 = 4 - s2;
644
+
645
+ switch (s2) {
646
+ case 1:
647
+ val_le = store_whole_le4(pv, s1, val_le);
648
+ *(uint8_t *)(pv + 3) = val_le;
649
+ break;
650
+ case 3:
651
+ *(uint8_t *)pv = val_le;
652
+ store_whole_le4(pv + 1, s2, val_le >> 8);
653
+ break;
654
+ case 0: /* aligned */
655
+ case 2: /* atmax MO_16 */
656
+ default:
657
+ g_assert_not_reached();
208
+ }
658
+ }
209
+ }
659
+ }
210
+ }
660
+ return;
211
+ r0 -= m;
661
+ case MO_32:
212
+
662
+ if ((pi & 7) < 4) {
213
+ *r = r0;
663
+ if (HAVE_al8) {
214
+ return (q1 << 32) | q0;
664
+ store_whole_le8(pv, 4, cpu_to_le32(val));
215
+#endif
665
+ return;
216
+}
666
+ }
217
+
667
+ } else {
218
#endif
668
+ if (HAVE_al16) {
669
+ store_whole_le16(pv, 4, int128_make64(cpu_to_le32(val)));
670
+ return;
671
+ }
672
+ }
673
+ cpu_loop_exit_atomic(env_cpu(env), ra);
674
+ default:
675
+ g_assert_not_reached();
676
+ }
677
+}
678
+
679
+/**
680
+ * store_atom_8:
681
+ * @p: host address
682
+ * @val: the value to store
683
+ * @memop: the full memory op
684
+ *
685
+ * Store 8 bytes to @p, honoring the atomicity of @memop.
686
+ */
687
+static void store_atom_8(CPUArchState *env, uintptr_t ra,
688
+ void *pv, MemOp memop, uint64_t val)
689
+{
690
+ uintptr_t pi = (uintptr_t)pv;
691
+ int atmax;
692
+
693
+ if (HAVE_al8 && likely((pi & 7) == 0)) {
694
+ store_atomic8(pv, val);
695
+ return;
696
+ }
697
+
698
+ atmax = required_atomicity(env, pi, memop);
699
+ switch (atmax) {
700
+ case MO_8:
701
+ stq_he_p(pv, val);
702
+ return;
703
+ case MO_16:
704
+ store_atom_8_by_2(pv, val);
705
+ return;
706
+ case MO_32:
707
+ store_atom_8_by_4(pv, val);
708
+ return;
709
+ case -MO_32:
710
+ if (HAVE_al8) {
711
+ uint64_t val_le = cpu_to_le64(val);
712
+ int s2 = pi & 7;
713
+ int s1 = 8 - s2;
714
+
715
+ switch (s2) {
716
+ case 1 ... 3:
717
+ val_le = store_whole_le8(pv, s1, val_le);
718
+ store_bytes_leN(pv + s1, s2, val_le);
719
+ break;
720
+ case 5 ... 7:
721
+ val_le = store_bytes_leN(pv, s1, val_le);
722
+ store_whole_le8(pv + s1, s2, val_le);
723
+ break;
724
+ case 0: /* aligned */
725
+ case 4: /* atmax MO_32 */
726
+ default:
727
+ g_assert_not_reached();
728
+ }
729
+ return;
730
+ }
731
+ break;
732
+ case MO_64:
733
+ if (HAVE_al16) {
734
+ store_whole_le16(pv, 8, int128_make64(cpu_to_le64(val)));
735
+ return;
736
+ }
737
+ break;
738
+ default:
739
+ g_assert_not_reached();
740
+ }
741
+ cpu_loop_exit_atomic(env_cpu(env), ra);
742
+}
219
--
743
--
220
2.25.1
744
2.34.1
221
222
diff view generated by jsdifflib
1
Certain targets, like riscv, produce signed 32-bit results.
1
With the current structure of cputlb.c, there is no difference
2
This can lead to lots of redundant extensions as values are
2
between the little-endian and big-endian entry points, aside
3
manipulated.
3
from the assert. Unify the pairs of functions.
4
4
5
Begin by tracking only the obvious sign-extensions, and
5
Hoist the qemu_{ld,st}_helpers arrays to tcg.c.
6
converting them to simple copies when possible.
7
6
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
10
---
12
tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
11
docs/devel/loads-stores.rst | 36 ++----
13
1 file changed, 102 insertions(+), 21 deletions(-)
12
include/tcg/tcg-ldst.h | 60 ++++------
13
accel/tcg/cputlb.c | 190 ++++++++++---------------------
14
tcg/tcg.c | 21 ++++
15
tcg/tci.c | 61 ++++------
16
tcg/aarch64/tcg-target.c.inc | 33 ------
17
tcg/arm/tcg-target.c.inc | 37 ------
18
tcg/i386/tcg-target.c.inc | 30 +----
19
tcg/loongarch64/tcg-target.c.inc | 23 ----
20
tcg/mips/tcg-target.c.inc | 31 -----
21
tcg/ppc/tcg-target.c.inc | 30 +----
22
tcg/riscv/tcg-target.c.inc | 42 -------
23
tcg/s390x/tcg-target.c.inc | 31 +----
24
tcg/sparc64/tcg-target.c.inc | 32 +-----
25
14 files changed, 146 insertions(+), 511 deletions(-)
14
26
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
27
diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst
16
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
29
--- a/docs/devel/loads-stores.rst
18
+++ b/tcg/optimize.c
30
+++ b/docs/devel/loads-stores.rst
19
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
31
@@ -XXX,XX +XXX,XX @@ swap: ``translator_ld{sign}{size}_swap(env, ptr, swap)``
20
TCGTemp *next_copy;
32
Regexes for git grep
21
uint64_t val;
33
- ``\<translator_ld[us]\?[bwlq]\(_swap\)\?\>``
22
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
34
23
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
35
-``helper_*_{ld,st}*_mmu``
24
} TempOptInfo;
36
+``helper_{ld,st}*_mmu``
25
37
~~~~~~~~~~~~~~~~~~~~~~~~~
26
typedef struct OptContext {
38
27
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
39
These functions are intended primarily to be called by the code
28
/* In flight values from optimization. */
40
-generated by the TCG backend. They may also be called by target
29
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
41
-CPU helper function code. Like the ``cpu_{ld,st}_mmuidx_ra`` functions
30
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
42
-they perform accesses by guest virtual address, with a given ``mmuidx``.
31
+ uint64_t s_mask; /* mask of clrsb(value) bits */
43
+generated by the TCG backend. Like the ``cpu_{ld,st}_mmu`` functions
32
TCGType type;
44
+they perform accesses by guest virtual address, with a given ``MemOpIdx``.
33
} OptContext;
45
34
46
-These functions specify an ``opindex`` parameter which encodes
35
+/* Calculate the smask for a specific value. */
47
-(among other things) the mmu index to use for the access. This parameter
36
+static uint64_t smask_from_value(uint64_t value)
48
-should be created by calling ``make_memop_idx()``.
37
+{
49
+They differ from ``cpu_{ld,st}_mmu`` in that they take the endianness
38
+ int rep = clrsb64(value);
50
+of the operation only from the MemOpIdx, and loads extend the return
39
+ return ~(~0ull >> rep);
51
+value to the size of a host general register (``tcg_target_ulong``).
40
+}
52
53
-The ``retaddr`` parameter should be the result of GETPC() called directly
54
-from the top level HELPER(foo) function (or 0 if no guest CPU state
55
-unwinding is required).
56
+load: ``helper_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
57
58
-**TODO** The names of these functions are a bit odd for historical
59
-reasons because they were originally expected to be called only from
60
-within generated code. We should rename them to bring them more in
61
-line with the other memory access functions. The explicit endianness
62
-is the only feature they have beyond ``*_mmuidx_ra``.
63
-
64
-load: ``helper_{endian}_ld{sign}{size}_mmu(env, addr, opindex, retaddr)``
65
-
66
-store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
67
+store: ``helper_{size}_mmu(env, addr, val, opindex, retaddr)``
68
69
``sign``
70
- (empty) : for 32 or 64 bit sizes
71
@@ -XXX,XX +XXX,XX @@ store: ``helper_{endian}_st{size}_mmu(env, addr, val, opindex, retaddr)``
72
- ``l`` : 32 bits
73
- ``q`` : 64 bits
74
75
-``endian``
76
- - ``le`` : little endian
77
- - ``be`` : big endian
78
- - ``ret`` : target endianness
79
-
80
Regexes for git grep
81
- - ``\<helper_\(le\|be\|ret\)_ld[us]\?[bwlq]_mmu\>``
82
- - ``\<helper_\(le\|be\|ret\)_st[bwlq]_mmu\>``
83
+ - ``\<helper_ld[us]\?[bwlq]_mmu\>``
84
+ - ``\<helper_st[bwlq]_mmu\>``
85
86
``address_space_*``
87
~~~~~~~~~~~~~~~~~~~
88
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/include/tcg/tcg-ldst.h
91
+++ b/include/tcg/tcg-ldst.h
92
@@ -XXX,XX +XXX,XX @@
93
#ifdef CONFIG_SOFTMMU
94
95
/* Value zero-extended to tcg register size. */
96
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
97
- MemOpIdx oi, uintptr_t retaddr);
98
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
99
- MemOpIdx oi, uintptr_t retaddr);
100
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
101
- MemOpIdx oi, uintptr_t retaddr);
102
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
103
- MemOpIdx oi, uintptr_t retaddr);
104
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
105
- MemOpIdx oi, uintptr_t retaddr);
106
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
107
- MemOpIdx oi, uintptr_t retaddr);
108
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
109
- MemOpIdx oi, uintptr_t retaddr);
110
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
111
+ MemOpIdx oi, uintptr_t retaddr);
112
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
113
+ MemOpIdx oi, uintptr_t retaddr);
114
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
115
+ MemOpIdx oi, uintptr_t retaddr);
116
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
117
+ MemOpIdx oi, uintptr_t retaddr);
118
119
/* Value sign-extended to tcg register size. */
120
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
121
- MemOpIdx oi, uintptr_t retaddr);
122
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
123
- MemOpIdx oi, uintptr_t retaddr);
124
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
125
- MemOpIdx oi, uintptr_t retaddr);
126
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
127
- MemOpIdx oi, uintptr_t retaddr);
128
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
129
- MemOpIdx oi, uintptr_t retaddr);
130
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
131
+ MemOpIdx oi, uintptr_t retaddr);
132
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
133
+ MemOpIdx oi, uintptr_t retaddr);
134
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
135
+ MemOpIdx oi, uintptr_t retaddr);
136
137
/*
138
* Value extended to at least uint32_t, so that some ABIs do not require
139
* zero-extension from uint8_t or uint16_t.
140
*/
141
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
142
- MemOpIdx oi, uintptr_t retaddr);
143
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
144
- MemOpIdx oi, uintptr_t retaddr);
145
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
146
- MemOpIdx oi, uintptr_t retaddr);
147
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
148
- MemOpIdx oi, uintptr_t retaddr);
149
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
150
- MemOpIdx oi, uintptr_t retaddr);
151
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
152
- MemOpIdx oi, uintptr_t retaddr);
153
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
154
- MemOpIdx oi, uintptr_t retaddr);
155
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
156
+ MemOpIdx oi, uintptr_t retaddr);
157
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
158
+ MemOpIdx oi, uintptr_t retaddr);
159
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
160
+ MemOpIdx oi, uintptr_t retaddr);
161
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
162
+ MemOpIdx oi, uintptr_t retaddr);
163
164
#else
165
166
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
167
index XXXXXXX..XXXXXXX 100644
168
--- a/accel/tcg/cputlb.c
169
+++ b/accel/tcg/cputlb.c
170
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
171
cpu_loop_exit_atomic(env_cpu(env), retaddr);
172
}
173
174
-/*
175
- * Verify that we have passed the correct MemOp to the correct function.
176
- *
177
- * In the case of the helper_*_mmu functions, we will have done this by
178
- * using the MemOp to look up the helper during code generation.
179
- *
180
- * In the case of the cpu_*_mmu functions, this is up to the caller.
181
- * We could present one function to target code, and dispatch based on
182
- * the MemOp, but so far we have worked hard to avoid an indirect function
183
- * call along the memory path.
184
- */
185
-static void validate_memop(MemOpIdx oi, MemOp expected)
186
-{
187
-#ifdef CONFIG_DEBUG_TCG
188
- MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
189
- assert(have == expected);
190
-#endif
191
-}
192
-
193
/*
194
* Load Helpers
195
*
196
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
197
return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
198
}
199
200
-tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
201
- MemOpIdx oi, uintptr_t retaddr)
202
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
203
+ MemOpIdx oi, uintptr_t retaddr)
204
{
205
- validate_memop(oi, MO_UB);
206
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
207
return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
208
}
209
210
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
211
return ret;
212
}
213
214
-tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
215
- MemOpIdx oi, uintptr_t retaddr)
216
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
217
+ MemOpIdx oi, uintptr_t retaddr)
218
{
219
- validate_memop(oi, MO_LEUW);
220
- return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
221
-}
222
-
223
-tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
224
- MemOpIdx oi, uintptr_t retaddr)
225
-{
226
- validate_memop(oi, MO_BEUW);
227
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
228
return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
229
}
230
231
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
232
return ret;
233
}
234
235
-tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
236
- MemOpIdx oi, uintptr_t retaddr)
237
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
238
+ MemOpIdx oi, uintptr_t retaddr)
239
{
240
- validate_memop(oi, MO_LEUL);
241
- return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
242
-}
243
-
244
-tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
245
- MemOpIdx oi, uintptr_t retaddr)
246
-{
247
- validate_memop(oi, MO_BEUL);
248
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
249
return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
250
}
251
252
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
253
return ret;
254
}
255
256
-uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
257
- MemOpIdx oi, uintptr_t retaddr)
258
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
259
+ MemOpIdx oi, uintptr_t retaddr)
260
{
261
- validate_memop(oi, MO_LEUQ);
262
- return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
263
-}
264
-
265
-uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
266
- MemOpIdx oi, uintptr_t retaddr)
267
-{
268
- validate_memop(oi, MO_BEUQ);
269
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
270
return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
271
}
272
273
@@ -XXX,XX +XXX,XX @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
274
* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
275
*/
276
277
-
278
-tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
279
- MemOpIdx oi, uintptr_t retaddr)
280
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
281
+ MemOpIdx oi, uintptr_t retaddr)
282
{
283
- return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
284
+ return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
285
}
286
287
-tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
288
- MemOpIdx oi, uintptr_t retaddr)
289
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
290
+ MemOpIdx oi, uintptr_t retaddr)
291
{
292
- return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
293
+ return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
294
}
295
296
-tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
297
- MemOpIdx oi, uintptr_t retaddr)
298
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
299
+ MemOpIdx oi, uintptr_t retaddr)
300
{
301
- return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
302
-}
303
-
304
-tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
305
- MemOpIdx oi, uintptr_t retaddr)
306
-{
307
- return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
308
-}
309
-
310
-tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
311
- MemOpIdx oi, uintptr_t retaddr)
312
-{
313
- return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
314
+ return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
315
}
316
317
/*
318
@@ -XXX,XX +XXX,XX @@ uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
319
{
320
uint8_t ret;
321
322
- validate_memop(oi, MO_UB);
323
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_UB);
324
ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
325
plugin_load_cb(env, addr, oi);
326
return ret;
327
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
328
{
329
uint16_t ret;
330
331
- validate_memop(oi, MO_BEUW);
332
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
333
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
334
plugin_load_cb(env, addr, oi);
335
return ret;
336
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
337
{
338
uint32_t ret;
339
340
- validate_memop(oi, MO_BEUL);
341
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
342
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
343
plugin_load_cb(env, addr, oi);
344
return ret;
345
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
346
{
347
uint64_t ret;
348
349
- validate_memop(oi, MO_BEUQ);
350
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
351
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
352
plugin_load_cb(env, addr, oi);
353
return ret;
354
@@ -XXX,XX +XXX,XX @@ uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
355
{
356
uint16_t ret;
357
358
- validate_memop(oi, MO_LEUW);
359
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
360
ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
361
plugin_load_cb(env, addr, oi);
362
return ret;
363
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
364
{
365
uint32_t ret;
366
367
- validate_memop(oi, MO_LEUL);
368
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
369
ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
370
plugin_load_cb(env, addr, oi);
371
return ret;
372
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
373
{
374
uint64_t ret;
375
376
- validate_memop(oi, MO_LEUQ);
377
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
378
ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
379
plugin_load_cb(env, addr, oi);
380
return ret;
381
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
382
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
383
new_oi = make_memop_idx(mop, mmu_idx);
384
385
- h = helper_be_ldq_mmu(env, addr, new_oi, ra);
386
- l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
387
+ h = helper_ldq_mmu(env, addr, new_oi, ra);
388
+ l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
389
390
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
391
return int128_make128(l, h);
392
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
393
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
394
new_oi = make_memop_idx(mop, mmu_idx);
395
396
- l = helper_le_ldq_mmu(env, addr, new_oi, ra);
397
- h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
398
+ l = helper_ldq_mmu(env, addr, new_oi, ra);
399
+ h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
400
401
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
402
return int128_make128(l, h);
403
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
404
}
405
}
406
407
-void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
408
- MemOpIdx oi, uintptr_t ra)
409
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
410
+ MemOpIdx oi, uintptr_t ra)
411
{
412
MMULookupLocals l;
413
bool crosspage;
414
415
- validate_memop(oi, MO_UB);
416
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
417
crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
418
tcg_debug_assert(!crosspage);
419
420
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
421
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
422
}
423
424
-void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
425
- MemOpIdx oi, uintptr_t retaddr)
426
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
427
+ MemOpIdx oi, uintptr_t retaddr)
428
{
429
- validate_memop(oi, MO_LEUW);
430
- do_st2_mmu(env, addr, val, oi, retaddr);
431
-}
432
-
433
-void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
434
- MemOpIdx oi, uintptr_t retaddr)
435
-{
436
- validate_memop(oi, MO_BEUW);
437
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
438
do_st2_mmu(env, addr, val, oi, retaddr);
439
}
440
441
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
442
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
443
}
444
445
-void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
446
- MemOpIdx oi, uintptr_t retaddr)
447
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
448
+ MemOpIdx oi, uintptr_t retaddr)
449
{
450
- validate_memop(oi, MO_LEUL);
451
- do_st4_mmu(env, addr, val, oi, retaddr);
452
-}
453
-
454
-void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
455
- MemOpIdx oi, uintptr_t retaddr)
456
-{
457
- validate_memop(oi, MO_BEUL);
458
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
459
do_st4_mmu(env, addr, val, oi, retaddr);
460
}
461
462
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
463
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
464
}
465
466
-void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
467
- MemOpIdx oi, uintptr_t retaddr)
468
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
469
+ MemOpIdx oi, uintptr_t retaddr)
470
{
471
- validate_memop(oi, MO_LEUQ);
472
- do_st8_mmu(env, addr, val, oi, retaddr);
473
-}
474
-
475
-void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
476
- MemOpIdx oi, uintptr_t retaddr)
477
-{
478
- validate_memop(oi, MO_BEUQ);
479
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
480
do_st8_mmu(env, addr, val, oi, retaddr);
481
}
482
483
@@ -XXX,XX +XXX,XX @@ static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
484
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
485
MemOpIdx oi, uintptr_t retaddr)
486
{
487
- helper_ret_stb_mmu(env, addr, val, oi, retaddr);
488
+ helper_stb_mmu(env, addr, val, oi, retaddr);
489
plugin_store_cb(env, addr, oi);
490
}
491
492
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
493
MemOpIdx oi, uintptr_t retaddr)
494
{
495
- helper_be_stw_mmu(env, addr, val, oi, retaddr);
496
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUW);
497
+ do_st2_mmu(env, addr, val, oi, retaddr);
498
plugin_store_cb(env, addr, oi);
499
}
500
501
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
502
MemOpIdx oi, uintptr_t retaddr)
503
{
504
- helper_be_stl_mmu(env, addr, val, oi, retaddr);
505
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUL);
506
+ do_st4_mmu(env, addr, val, oi, retaddr);
507
plugin_store_cb(env, addr, oi);
508
}
509
510
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
511
MemOpIdx oi, uintptr_t retaddr)
512
{
513
- helper_be_stq_mmu(env, addr, val, oi, retaddr);
514
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_BEUQ);
515
+ do_st8_mmu(env, addr, val, oi, retaddr);
516
plugin_store_cb(env, addr, oi);
517
}
518
519
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
520
MemOpIdx oi, uintptr_t retaddr)
521
{
522
- helper_le_stw_mmu(env, addr, val, oi, retaddr);
523
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUW);
524
+ do_st2_mmu(env, addr, val, oi, retaddr);
525
plugin_store_cb(env, addr, oi);
526
}
527
528
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
529
MemOpIdx oi, uintptr_t retaddr)
530
{
531
- helper_le_stl_mmu(env, addr, val, oi, retaddr);
532
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUL);
533
+ do_st4_mmu(env, addr, val, oi, retaddr);
534
plugin_store_cb(env, addr, oi);
535
}
536
537
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
538
MemOpIdx oi, uintptr_t retaddr)
539
{
540
- helper_le_stq_mmu(env, addr, val, oi, retaddr);
541
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == MO_LEUQ);
542
+ do_st8_mmu(env, addr, val, oi, retaddr);
543
plugin_store_cb(env, addr, oi);
544
}
545
546
@@ -XXX,XX +XXX,XX @@ void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
547
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
548
new_oi = make_memop_idx(mop, mmu_idx);
549
550
- helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
551
- helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
552
+ helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
553
+ helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
554
555
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
556
}
557
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
558
mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
559
new_oi = make_memop_idx(mop, mmu_idx);
560
561
- helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
562
- helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
563
+ helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
564
+ helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
565
566
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
567
}
568
diff --git a/tcg/tcg.c b/tcg/tcg.c
569
index XXXXXXX..XXXXXXX 100644
570
--- a/tcg/tcg.c
571
+++ b/tcg/tcg.c
572
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
573
const TCGLdstHelperParam *p)
574
__attribute__((unused));
575
576
+#ifdef CONFIG_SOFTMMU
577
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
578
+ [MO_UB] = helper_ldub_mmu,
579
+ [MO_SB] = helper_ldsb_mmu,
580
+ [MO_UW] = helper_lduw_mmu,
581
+ [MO_SW] = helper_ldsw_mmu,
582
+ [MO_UL] = helper_ldul_mmu,
583
+ [MO_UQ] = helper_ldq_mmu,
584
+#if TCG_TARGET_REG_BITS == 64
585
+ [MO_SL] = helper_ldsl_mmu,
586
+#endif
587
+};
41
+
588
+
42
+/*
589
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
43
+ * Calculate the smask for a given set of known-zeros.
590
+ [MO_8] = helper_stb_mmu,
44
+ * If there are lots of zeros on the left, we can consider the remainder
591
+ [MO_16] = helper_stw_mmu,
45
+ * an unsigned field, and thus the corresponding signed field is one bit
592
+ [MO_32] = helper_stl_mmu,
46
+ * larger.
593
+ [MO_64] = helper_stq_mmu,
47
+ */
594
+};
48
+static uint64_t smask_from_zmask(uint64_t zmask)
595
+#endif
49
+{
50
+ /*
51
+ * Only the 0 bits are significant for zmask, thus the msb itself
52
+ * must be zero, else we have no sign information.
53
+ */
54
+ int rep = clz64(zmask);
55
+ if (rep == 0) {
56
+ return 0;
57
+ }
58
+ rep -= 1;
59
+ return ~(~0ull >> rep);
60
+}
61
+
596
+
62
static inline TempOptInfo *ts_info(TCGTemp *ts)
597
TCGContext tcg_init_ctx;
63
{
598
__thread TCGContext *tcg_ctx;
64
return ts->state_ptr;
599
65
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
600
diff --git a/tcg/tci.c b/tcg/tci.c
66
ti->prev_copy = ts;
601
index XXXXXXX..XXXXXXX 100644
67
ti->is_const = false;
602
--- a/tcg/tci.c
68
ti->z_mask = -1;
603
+++ b/tcg/tci.c
69
+ ti->s_mask = 0;
604
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
70
}
605
uintptr_t ra = (uintptr_t)tb_ptr;
71
606
72
static void reset_temp(TCGArg arg)
607
#ifdef CONFIG_SOFTMMU
73
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
608
- switch (mop & (MO_BSWAP | MO_SSIZE)) {
74
ti->is_const = true;
609
+ switch (mop & MO_SSIZE) {
75
ti->val = ts->val;
610
case MO_UB:
76
ti->z_mask = ts->val;
611
- return helper_ret_ldub_mmu(env, taddr, oi, ra);
77
+ ti->s_mask = smask_from_value(ts->val);
612
+ return helper_ldub_mmu(env, taddr, oi, ra);
78
} else {
613
case MO_SB:
79
ti->is_const = false;
614
- return helper_ret_ldsb_mmu(env, taddr, oi, ra);
80
ti->z_mask = -1;
615
- case MO_LEUW:
81
+ ti->s_mask = 0;
616
- return helper_le_lduw_mmu(env, taddr, oi, ra);
82
}
617
- case MO_LESW:
83
}
618
- return helper_le_ldsw_mmu(env, taddr, oi, ra);
84
619
- case MO_LEUL:
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
620
- return helper_le_ldul_mmu(env, taddr, oi, ra);
86
op->args[1] = src;
621
- case MO_LESL:
87
622
- return helper_le_ldsl_mmu(env, taddr, oi, ra);
88
di->z_mask = si->z_mask;
623
- case MO_LEUQ:
89
+ di->s_mask = si->s_mask;
624
- return helper_le_ldq_mmu(env, taddr, oi, ra);
90
625
- case MO_BEUW:
91
if (src_ts->type == dst_ts->type) {
626
- return helper_be_lduw_mmu(env, taddr, oi, ra);
92
TempOptInfo *ni = ts_info(si->next_copy);
627
- case MO_BESW:
93
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
628
- return helper_be_ldsw_mmu(env, taddr, oi, ra);
94
629
- case MO_BEUL:
95
nb_oargs = def->nb_oargs;
630
- return helper_be_ldul_mmu(env, taddr, oi, ra);
96
for (i = 0; i < nb_oargs; i++) {
631
- case MO_BESL:
97
- reset_temp(op->args[i]);
632
- return helper_be_ldsl_mmu(env, taddr, oi, ra);
98
+ TCGTemp *ts = arg_temp(op->args[i]);
633
- case MO_BEUQ:
99
+ reset_ts(ts);
634
- return helper_be_ldq_mmu(env, taddr, oi, ra);
100
/*
635
+ return helper_ldsb_mmu(env, taddr, oi, ra);
101
- * Save the corresponding known-zero bits mask for the
636
+ case MO_UW:
102
+ * Save the corresponding known-zero/sign bits mask for the
637
+ return helper_lduw_mmu(env, taddr, oi, ra);
103
* first output argument (only one supported so far).
638
+ case MO_SW:
104
*/
639
+ return helper_ldsw_mmu(env, taddr, oi, ra);
105
if (i == 0) {
640
+ case MO_UL:
106
- arg_info(op->args[i])->z_mask = ctx->z_mask;
641
+ return helper_ldul_mmu(env, taddr, oi, ra);
107
+ ts_info(ts)->z_mask = ctx->z_mask;
642
+ case MO_SL:
108
+ ts_info(ts)->s_mask = ctx->s_mask;
643
+ return helper_ldsl_mmu(env, taddr, oi, ra);
109
}
644
+ case MO_UQ:
110
}
645
+ return helper_ldq_mmu(env, taddr, oi, ra);
111
}
112
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
113
{
114
uint64_t a_mask = ctx->a_mask;
115
uint64_t z_mask = ctx->z_mask;
116
+ uint64_t s_mask = ctx->s_mask;
117
118
/*
119
* 32-bit ops generate 32-bit results, which for the purpose of
120
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
121
if (ctx->type == TCG_TYPE_I32) {
122
a_mask = (int32_t)a_mask;
123
z_mask = (int32_t)z_mask;
124
+ s_mask |= MAKE_64BIT_MASK(32, 32);
125
ctx->z_mask = z_mask;
126
+ ctx->s_mask = s_mask;
127
}
128
129
if (z_mask == 0) {
130
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
131
132
static bool fold_bswap(OptContext *ctx, TCGOp *op)
133
{
134
- uint64_t z_mask, sign;
135
+ uint64_t z_mask, s_mask, sign;
136
137
if (arg_is_const(op->args[1])) {
138
uint64_t t = arg_info(op->args[1])->val;
139
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
140
}
141
142
z_mask = arg_info(op->args[1])->z_mask;
143
+
144
switch (op->opc) {
145
case INDEX_op_bswap16_i32:
146
case INDEX_op_bswap16_i64:
147
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
148
default:
646
default:
149
g_assert_not_reached();
647
g_assert_not_reached();
150
}
648
}
151
+ s_mask = smask_from_zmask(z_mask);
649
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
152
650
uintptr_t ra = (uintptr_t)tb_ptr;
153
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
651
154
case TCG_BSWAP_OZ:
652
#ifdef CONFIG_SOFTMMU
155
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
653
- switch (mop & (MO_BSWAP | MO_SIZE)) {
156
/* If the sign bit may be 1, force all the bits above to 1. */
654
+ switch (mop & MO_SIZE) {
157
if (z_mask & sign) {
655
case MO_UB:
158
z_mask |= sign;
656
- helper_ret_stb_mmu(env, taddr, val, oi, ra);
159
+ s_mask = sign << 1;
657
+ helper_stb_mmu(env, taddr, val, oi, ra);
160
}
161
break;
658
break;
162
default:
659
- case MO_LEUW:
163
/* The high bits are undefined: force all bits above the sign to 1. */
660
- helper_le_stw_mmu(env, taddr, val, oi, ra);
164
z_mask |= sign << 1;
661
+ case MO_UW:
165
+ s_mask = 0;
662
+ helper_stw_mmu(env, taddr, val, oi, ra);
166
break;
663
break;
167
}
664
- case MO_LEUL:
168
ctx->z_mask = z_mask;
665
- helper_le_stl_mmu(env, taddr, val, oi, ra);
169
+ ctx->s_mask = s_mask;
666
+ case MO_UL:
170
667
+ helper_stl_mmu(env, taddr, val, oi, ra);
171
return fold_masks(ctx, op);
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
174
static bool fold_extract(OptContext *ctx, TCGOp *op)
175
{
176
uint64_t z_mask_old, z_mask;
177
+ int pos = op->args[2];
178
+ int len = op->args[3];
179
180
if (arg_is_const(op->args[1])) {
181
uint64_t t;
182
183
t = arg_info(op->args[1])->val;
184
- t = extract64(t, op->args[2], op->args[3]);
185
+ t = extract64(t, pos, len);
186
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
187
}
188
189
z_mask_old = arg_info(op->args[1])->z_mask;
190
- z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
191
- if (op->args[2] == 0) {
192
+ z_mask = extract64(z_mask_old, pos, len);
193
+ if (pos == 0) {
194
ctx->a_mask = z_mask_old ^ z_mask;
195
}
196
ctx->z_mask = z_mask;
197
+ ctx->s_mask = smask_from_zmask(z_mask);
198
199
return fold_masks(ctx, op);
200
}
201
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
202
203
static bool fold_exts(OptContext *ctx, TCGOp *op)
204
{
205
- uint64_t z_mask_old, z_mask, sign;
206
+ uint64_t s_mask_old, s_mask, z_mask, sign;
207
bool type_change = false;
208
209
if (fold_const1(ctx, op)) {
210
return true;
211
}
212
213
- z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
214
+ z_mask = arg_info(op->args[1])->z_mask;
215
+ s_mask = arg_info(op->args[1])->s_mask;
216
+ s_mask_old = s_mask;
217
218
switch (op->opc) {
219
CASE_OP_32_64(ext8s):
220
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
221
222
if (z_mask & sign) {
223
z_mask |= sign;
224
- } else if (!type_change) {
225
- ctx->a_mask = z_mask_old ^ z_mask;
226
}
227
+ s_mask |= sign << 1;
228
+
229
ctx->z_mask = z_mask;
230
+ ctx->s_mask = s_mask;
231
+ if (!type_change) {
232
+ ctx->a_mask = s_mask & ~s_mask_old;
233
+ }
234
235
return fold_masks(ctx, op);
236
}
237
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
238
}
239
240
ctx->z_mask = z_mask;
241
+ ctx->s_mask = smask_from_zmask(z_mask);
242
if (!type_change) {
243
ctx->a_mask = z_mask_old ^ z_mask;
244
}
245
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
246
MemOp mop = get_memop(oi);
247
int width = 8 * memop_size(mop);
248
249
- if (!(mop & MO_SIGN) && width < 64) {
250
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
251
+ if (width < 64) {
252
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
253
+ if (!(mop & MO_SIGN)) {
254
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
255
+ ctx->s_mask <<= 1;
256
+ }
257
}
258
259
/* Opcodes that touch guest memory stop the mb optimization. */
260
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
261
262
static bool fold_sextract(OptContext *ctx, TCGOp *op)
263
{
264
- int64_t z_mask_old, z_mask;
265
+ uint64_t z_mask, s_mask, s_mask_old;
266
+ int pos = op->args[2];
267
+ int len = op->args[3];
268
269
if (arg_is_const(op->args[1])) {
270
uint64_t t;
271
272
t = arg_info(op->args[1])->val;
273
- t = sextract64(t, op->args[2], op->args[3]);
274
+ t = sextract64(t, pos, len);
275
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
276
}
277
278
- z_mask_old = arg_info(op->args[1])->z_mask;
279
- z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
280
- if (op->args[2] == 0 && z_mask >= 0) {
281
- ctx->a_mask = z_mask_old ^ z_mask;
282
- }
283
+ z_mask = arg_info(op->args[1])->z_mask;
284
+ z_mask = sextract64(z_mask, pos, len);
285
ctx->z_mask = z_mask;
286
287
+ s_mask_old = arg_info(op->args[1])->s_mask;
288
+ s_mask = sextract64(s_mask_old, pos, len);
289
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
290
+ ctx->s_mask = s_mask;
291
+
292
+ if (pos == 0) {
293
+ ctx->a_mask = s_mask & ~s_mask_old;
294
+ }
295
+
296
return fold_masks(ctx, op);
297
}
298
299
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
300
{
301
/* We can't do any folding with a load, but we can record bits. */
302
switch (op->opc) {
303
+ CASE_OP_32_64(ld8s):
304
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
305
+ break;
306
CASE_OP_32_64(ld8u):
307
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
308
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
309
+ break;
310
+ CASE_OP_32_64(ld16s):
311
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
312
break;
668
break;
313
CASE_OP_32_64(ld16u):
669
- case MO_LEUQ:
314
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
670
- helper_le_stq_mmu(env, taddr, val, oi, ra);
315
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
671
- break;
316
+ break;
672
- case MO_BEUW:
317
+ case INDEX_op_ld32s_i64:
673
- helper_be_stw_mmu(env, taddr, val, oi, ra);
318
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
674
- break;
319
break;
675
- case MO_BEUL:
320
case INDEX_op_ld32u_i64:
676
- helper_be_stl_mmu(env, taddr, val, oi, ra);
321
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
677
- break;
322
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
678
- case MO_BEUQ:
679
- helper_be_stq_mmu(env, taddr, val, oi, ra);
680
+ case MO_UQ:
681
+ helper_stq_mmu(env, taddr, val, oi, ra);
323
break;
682
break;
324
default:
683
default:
325
g_assert_not_reached();
684
g_assert_not_reached();
326
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
685
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
327
ctx.type = TCG_TYPE_I32;
686
index XXXXXXX..XXXXXXX 100644
328
}
687
--- a/tcg/aarch64/tcg-target.c.inc
329
688
+++ b/tcg/aarch64/tcg-target.c.inc
330
- /* Assume all bits affected, and no bits known zero. */
689
@@ -XXX,XX +XXX,XX @@ typedef struct {
331
+ /* Assume all bits affected, no bits known zero, no sign reps. */
690
} HostAddress;
332
ctx.a_mask = -1;
691
333
ctx.z_mask = -1;
692
#ifdef CONFIG_SOFTMMU
334
+ ctx.s_mask = 0;
693
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
335
694
- * MemOpIdx oi, uintptr_t ra)
336
/*
695
- */
337
* Process each opcode.
696
-static void * const qemu_ld_helpers[MO_SIZE + 1] = {
338
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
697
- [MO_8] = helper_ret_ldub_mmu,
339
case INDEX_op_extrh_i64_i32:
698
-#if HOST_BIG_ENDIAN
340
done = fold_extu(&ctx, op);
699
- [MO_16] = helper_be_lduw_mmu,
341
break;
700
- [MO_32] = helper_be_ldul_mmu,
342
+ CASE_OP_32_64(ld8s):
701
- [MO_64] = helper_be_ldq_mmu,
343
CASE_OP_32_64(ld8u):
702
-#else
344
+ CASE_OP_32_64(ld16s):
703
- [MO_16] = helper_le_lduw_mmu,
345
CASE_OP_32_64(ld16u):
704
- [MO_32] = helper_le_ldul_mmu,
346
+ case INDEX_op_ld32s_i64:
705
- [MO_64] = helper_le_ldq_mmu,
347
case INDEX_op_ld32u_i64:
706
-#endif
348
done = fold_tcg_ld(&ctx, op);
707
-};
349
break;
708
-
709
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
710
- * uintxx_t val, MemOpIdx oi,
711
- * uintptr_t ra)
712
- */
713
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
714
- [MO_8] = helper_ret_stb_mmu,
715
-#if HOST_BIG_ENDIAN
716
- [MO_16] = helper_be_stw_mmu,
717
- [MO_32] = helper_be_stl_mmu,
718
- [MO_64] = helper_be_stq_mmu,
719
-#else
720
- [MO_16] = helper_le_stw_mmu,
721
- [MO_32] = helper_le_stl_mmu,
722
- [MO_64] = helper_le_stq_mmu,
723
-#endif
724
-};
725
-
726
static const TCGLdstHelperParam ldst_helper_param = {
727
.ntmp = 1, .tmp = { TCG_REG_TMP }
728
};
729
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
730
index XXXXXXX..XXXXXXX 100644
731
--- a/tcg/arm/tcg-target.c.inc
732
+++ b/tcg/arm/tcg-target.c.inc
733
@@ -XXX,XX +XXX,XX @@ typedef struct {
734
} HostAddress;
735
736
#ifdef CONFIG_SOFTMMU
737
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
738
- * int mmu_idx, uintptr_t ra)
739
- */
740
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
741
- [MO_UB] = helper_ret_ldub_mmu,
742
- [MO_SB] = helper_ret_ldsb_mmu,
743
-#if HOST_BIG_ENDIAN
744
- [MO_UW] = helper_be_lduw_mmu,
745
- [MO_UL] = helper_be_ldul_mmu,
746
- [MO_UQ] = helper_be_ldq_mmu,
747
- [MO_SW] = helper_be_ldsw_mmu,
748
- [MO_SL] = helper_be_ldul_mmu,
749
-#else
750
- [MO_UW] = helper_le_lduw_mmu,
751
- [MO_UL] = helper_le_ldul_mmu,
752
- [MO_UQ] = helper_le_ldq_mmu,
753
- [MO_SW] = helper_le_ldsw_mmu,
754
- [MO_SL] = helper_le_ldul_mmu,
755
-#endif
756
-};
757
-
758
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
759
- * uintxx_t val, int mmu_idx, uintptr_t ra)
760
- */
761
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
762
- [MO_8] = helper_ret_stb_mmu,
763
-#if HOST_BIG_ENDIAN
764
- [MO_16] = helper_be_stw_mmu,
765
- [MO_32] = helper_be_stl_mmu,
766
- [MO_64] = helper_be_stq_mmu,
767
-#else
768
- [MO_16] = helper_le_stw_mmu,
769
- [MO_32] = helper_le_stl_mmu,
770
- [MO_64] = helper_le_stq_mmu,
771
-#endif
772
-};
773
-
774
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
775
{
776
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
777
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
778
index XXXXXXX..XXXXXXX 100644
779
--- a/tcg/i386/tcg-target.c.inc
780
+++ b/tcg/i386/tcg-target.c.inc
781
@@ -XXX,XX +XXX,XX @@ typedef struct {
782
} HostAddress;
783
784
#if defined(CONFIG_SOFTMMU)
785
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
786
- * int mmu_idx, uintptr_t ra)
787
- */
788
-static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
789
- [MO_UB] = helper_ret_ldub_mmu,
790
- [MO_LEUW] = helper_le_lduw_mmu,
791
- [MO_LEUL] = helper_le_ldul_mmu,
792
- [MO_LEUQ] = helper_le_ldq_mmu,
793
- [MO_BEUW] = helper_be_lduw_mmu,
794
- [MO_BEUL] = helper_be_ldul_mmu,
795
- [MO_BEUQ] = helper_be_ldq_mmu,
796
-};
797
-
798
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
799
- * uintxx_t val, int mmu_idx, uintptr_t ra)
800
- */
801
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
802
- [MO_UB] = helper_ret_stb_mmu,
803
- [MO_LEUW] = helper_le_stw_mmu,
804
- [MO_LEUL] = helper_le_stl_mmu,
805
- [MO_LEUQ] = helper_le_stq_mmu,
806
- [MO_BEUW] = helper_be_stw_mmu,
807
- [MO_BEUL] = helper_be_stl_mmu,
808
- [MO_BEUQ] = helper_be_stq_mmu,
809
-};
810
-
811
/*
812
* Because i686 has no register parameters and because x86_64 has xchg
813
* to handle addr/data register overlap, we have placed all input arguments
814
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
815
}
816
817
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
818
- tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
819
+ tcg_out_branch(s, 1, qemu_ld_helpers[opc & MO_SIZE]);
820
tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
821
822
tcg_out_jmp(s, l->raddr);
823
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
824
}
825
826
tcg_out_st_helper_args(s, l, &ldst_helper_param);
827
- tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
828
+ tcg_out_branch(s, 1, qemu_st_helpers[opc & MO_SIZE]);
829
830
tcg_out_jmp(s, l->raddr);
831
return true;
832
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
833
index XXXXXXX..XXXXXXX 100644
834
--- a/tcg/loongarch64/tcg-target.c.inc
835
+++ b/tcg/loongarch64/tcg-target.c.inc
836
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
837
*/
838
839
#if defined(CONFIG_SOFTMMU)
840
-/*
841
- * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
842
- * MemOpIdx oi, uintptr_t ra)
843
- */
844
-static void * const qemu_ld_helpers[4] = {
845
- [MO_8] = helper_ret_ldub_mmu,
846
- [MO_16] = helper_le_lduw_mmu,
847
- [MO_32] = helper_le_ldul_mmu,
848
- [MO_64] = helper_le_ldq_mmu,
849
-};
850
-
851
-/*
852
- * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
853
- * uintxx_t val, MemOpIdx oi,
854
- * uintptr_t ra)
855
- */
856
-static void * const qemu_st_helpers[4] = {
857
- [MO_8] = helper_ret_stb_mmu,
858
- [MO_16] = helper_le_stw_mmu,
859
- [MO_32] = helper_le_stl_mmu,
860
- [MO_64] = helper_le_stq_mmu,
861
-};
862
-
863
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
864
{
865
tcg_out_opc_b(s, 0);
866
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
867
index XXXXXXX..XXXXXXX 100644
868
--- a/tcg/mips/tcg-target.c.inc
869
+++ b/tcg/mips/tcg-target.c.inc
870
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
871
}
872
873
#if defined(CONFIG_SOFTMMU)
874
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
875
- [MO_UB] = helper_ret_ldub_mmu,
876
- [MO_SB] = helper_ret_ldsb_mmu,
877
-#if HOST_BIG_ENDIAN
878
- [MO_UW] = helper_be_lduw_mmu,
879
- [MO_SW] = helper_be_ldsw_mmu,
880
- [MO_UL] = helper_be_ldul_mmu,
881
- [MO_SL] = helper_be_ldsl_mmu,
882
- [MO_UQ] = helper_be_ldq_mmu,
883
-#else
884
- [MO_UW] = helper_le_lduw_mmu,
885
- [MO_SW] = helper_le_ldsw_mmu,
886
- [MO_UL] = helper_le_ldul_mmu,
887
- [MO_UQ] = helper_le_ldq_mmu,
888
- [MO_SL] = helper_le_ldsl_mmu,
889
-#endif
890
-};
891
-
892
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
893
- [MO_UB] = helper_ret_stb_mmu,
894
-#if HOST_BIG_ENDIAN
895
- [MO_UW] = helper_be_stw_mmu,
896
- [MO_UL] = helper_be_stl_mmu,
897
- [MO_UQ] = helper_be_stq_mmu,
898
-#else
899
- [MO_UW] = helper_le_stw_mmu,
900
- [MO_UL] = helper_le_stl_mmu,
901
- [MO_UQ] = helper_le_stq_mmu,
902
-#endif
903
-};
904
-
905
/* We have four temps, we might as well expose three of them. */
906
static const TCGLdstHelperParam ldst_helper_param = {
907
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
908
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
909
index XXXXXXX..XXXXXXX 100644
910
--- a/tcg/ppc/tcg-target.c.inc
911
+++ b/tcg/ppc/tcg-target.c.inc
912
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
913
};
914
915
#if defined (CONFIG_SOFTMMU)
916
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
917
- * int mmu_idx, uintptr_t ra)
918
- */
919
-static void * const qemu_ld_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
920
- [MO_UB] = helper_ret_ldub_mmu,
921
- [MO_LEUW] = helper_le_lduw_mmu,
922
- [MO_LEUL] = helper_le_ldul_mmu,
923
- [MO_LEUQ] = helper_le_ldq_mmu,
924
- [MO_BEUW] = helper_be_lduw_mmu,
925
- [MO_BEUL] = helper_be_ldul_mmu,
926
- [MO_BEUQ] = helper_be_ldq_mmu,
927
-};
928
-
929
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
930
- * uintxx_t val, int mmu_idx, uintptr_t ra)
931
- */
932
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
933
- [MO_UB] = helper_ret_stb_mmu,
934
- [MO_LEUW] = helper_le_stw_mmu,
935
- [MO_LEUL] = helper_le_stl_mmu,
936
- [MO_LEUQ] = helper_le_stq_mmu,
937
- [MO_BEUW] = helper_be_stw_mmu,
938
- [MO_BEUL] = helper_be_stl_mmu,
939
- [MO_BEUQ] = helper_be_stq_mmu,
940
-};
941
-
942
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
943
{
944
if (arg < 0) {
945
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
946
}
947
948
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
949
- tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
950
+ tcg_out_call_int(s, LK, qemu_ld_helpers[opc & MO_SIZE]);
951
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
952
953
tcg_out_b(s, 0, lb->raddr);
954
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
955
}
956
957
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
958
- tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
959
+ tcg_out_call_int(s, LK, qemu_st_helpers[opc & MO_SIZE]);
960
961
tcg_out_b(s, 0, lb->raddr);
962
return true;
963
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
964
index XXXXXXX..XXXXXXX 100644
965
--- a/tcg/riscv/tcg-target.c.inc
966
+++ b/tcg/riscv/tcg-target.c.inc
967
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
968
*/
969
970
#if defined(CONFIG_SOFTMMU)
971
-/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
972
- * MemOpIdx oi, uintptr_t ra)
973
- */
974
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
975
- [MO_UB] = helper_ret_ldub_mmu,
976
- [MO_SB] = helper_ret_ldsb_mmu,
977
-#if HOST_BIG_ENDIAN
978
- [MO_UW] = helper_be_lduw_mmu,
979
- [MO_SW] = helper_be_ldsw_mmu,
980
- [MO_UL] = helper_be_ldul_mmu,
981
-#if TCG_TARGET_REG_BITS == 64
982
- [MO_SL] = helper_be_ldsl_mmu,
983
-#endif
984
- [MO_UQ] = helper_be_ldq_mmu,
985
-#else
986
- [MO_UW] = helper_le_lduw_mmu,
987
- [MO_SW] = helper_le_ldsw_mmu,
988
- [MO_UL] = helper_le_ldul_mmu,
989
-#if TCG_TARGET_REG_BITS == 64
990
- [MO_SL] = helper_le_ldsl_mmu,
991
-#endif
992
- [MO_UQ] = helper_le_ldq_mmu,
993
-#endif
994
-};
995
-
996
-/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
997
- * uintxx_t val, MemOpIdx oi,
998
- * uintptr_t ra)
999
- */
1000
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
1001
- [MO_8] = helper_ret_stb_mmu,
1002
-#if HOST_BIG_ENDIAN
1003
- [MO_16] = helper_be_stw_mmu,
1004
- [MO_32] = helper_be_stl_mmu,
1005
- [MO_64] = helper_be_stq_mmu,
1006
-#else
1007
- [MO_16] = helper_le_stw_mmu,
1008
- [MO_32] = helper_le_stl_mmu,
1009
- [MO_64] = helper_le_stq_mmu,
1010
-#endif
1011
-};
1012
-
1013
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
1014
{
1015
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
1016
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
1017
index XXXXXXX..XXXXXXX 100644
1018
--- a/tcg/s390x/tcg-target.c.inc
1019
+++ b/tcg/s390x/tcg-target.c.inc
1020
@@ -XXX,XX +XXX,XX @@ static const uint8_t tcg_cond_to_ltr_cond[] = {
1021
[TCG_COND_GEU] = S390_CC_ALWAYS,
1022
};
1023
1024
-#ifdef CONFIG_SOFTMMU
1025
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
1026
- [MO_UB] = helper_ret_ldub_mmu,
1027
- [MO_SB] = helper_ret_ldsb_mmu,
1028
- [MO_LEUW] = helper_le_lduw_mmu,
1029
- [MO_LESW] = helper_le_ldsw_mmu,
1030
- [MO_LEUL] = helper_le_ldul_mmu,
1031
- [MO_LESL] = helper_le_ldsl_mmu,
1032
- [MO_LEUQ] = helper_le_ldq_mmu,
1033
- [MO_BEUW] = helper_be_lduw_mmu,
1034
- [MO_BESW] = helper_be_ldsw_mmu,
1035
- [MO_BEUL] = helper_be_ldul_mmu,
1036
- [MO_BESL] = helper_be_ldsl_mmu,
1037
- [MO_BEUQ] = helper_be_ldq_mmu,
1038
-};
1039
-
1040
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
1041
- [MO_UB] = helper_ret_stb_mmu,
1042
- [MO_LEUW] = helper_le_stw_mmu,
1043
- [MO_LEUL] = helper_le_stl_mmu,
1044
- [MO_LEUQ] = helper_le_stq_mmu,
1045
- [MO_BEUW] = helper_be_stw_mmu,
1046
- [MO_BEUL] = helper_be_stl_mmu,
1047
- [MO_BEUQ] = helper_be_stq_mmu,
1048
-};
1049
-#endif
1050
-
1051
static const tcg_insn_unit *tb_ret_addr;
1052
uint64_t s390_facilities[3];
1053
1054
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1055
}
1056
1057
tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
1058
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1059
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
1060
tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
1061
1062
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1063
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1064
}
1065
1066
tcg_out_st_helper_args(s, lb, &ldst_helper_param);
1067
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1068
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
1069
1070
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1071
return true;
1072
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
1073
index XXXXXXX..XXXXXXX 100644
1074
--- a/tcg/sparc64/tcg-target.c.inc
1075
+++ b/tcg/sparc64/tcg-target.c.inc
1076
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
1077
}
1078
1079
#ifdef CONFIG_SOFTMMU
1080
-static const tcg_insn_unit *qemu_ld_trampoline[(MO_SSIZE | MO_BSWAP) + 1];
1081
-static const tcg_insn_unit *qemu_st_trampoline[(MO_SIZE | MO_BSWAP) + 1];
1082
+static const tcg_insn_unit *qemu_ld_trampoline[MO_SSIZE + 1];
1083
+static const tcg_insn_unit *qemu_st_trampoline[MO_SIZE + 1];
1084
1085
static void build_trampolines(TCGContext *s)
1086
{
1087
- static void * const qemu_ld_helpers[] = {
1088
- [MO_UB] = helper_ret_ldub_mmu,
1089
- [MO_SB] = helper_ret_ldsb_mmu,
1090
- [MO_LEUW] = helper_le_lduw_mmu,
1091
- [MO_LESW] = helper_le_ldsw_mmu,
1092
- [MO_LEUL] = helper_le_ldul_mmu,
1093
- [MO_LEUQ] = helper_le_ldq_mmu,
1094
- [MO_BEUW] = helper_be_lduw_mmu,
1095
- [MO_BESW] = helper_be_ldsw_mmu,
1096
- [MO_BEUL] = helper_be_ldul_mmu,
1097
- [MO_BEUQ] = helper_be_ldq_mmu,
1098
- };
1099
- static void * const qemu_st_helpers[] = {
1100
- [MO_UB] = helper_ret_stb_mmu,
1101
- [MO_LEUW] = helper_le_stw_mmu,
1102
- [MO_LEUL] = helper_le_stl_mmu,
1103
- [MO_LEUQ] = helper_le_stq_mmu,
1104
- [MO_BEUW] = helper_be_stw_mmu,
1105
- [MO_BEUL] = helper_be_stl_mmu,
1106
- [MO_BEUQ] = helper_be_stq_mmu,
1107
- };
1108
-
1109
int i;
1110
1111
for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
1112
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
1113
/* We use the helpers to extend SB and SW data, leaving the case
1114
of SL needing explicit extending below. */
1115
if ((memop & MO_SSIZE) == MO_SL) {
1116
- func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1117
+ func = qemu_ld_trampoline[MO_UL];
1118
} else {
1119
- func = qemu_ld_trampoline[memop & (MO_BSWAP | MO_SSIZE)];
1120
+ func = qemu_ld_trampoline[memop & MO_SSIZE];
1121
}
1122
tcg_debug_assert(func != NULL);
1123
tcg_out_call_nodelay(s, func, false);
1124
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
1125
tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
1126
TCG_REG_O2, data_type, memop & MO_SIZE, data);
1127
1128
- func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
1129
+ func = qemu_st_trampoline[memop & MO_SIZE];
1130
tcg_debug_assert(func != NULL);
1131
tcg_out_call_nodelay(s, func, false);
1132
/* delay slot */
350
--
1133
--
351
2.25.1
1134
2.34.1
352
1135
353
1136
diff view generated by jsdifflib
New patch
1
TCG backends may need to defer to a helper to implement
2
the atomicity required by a given operation. Mirror the
3
interface used in system mode.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
include/tcg/tcg-ldst.h | 6 +-
9
accel/tcg/user-exec.c | 393 ++++++++++++++++++++++++++++-------------
10
tcg/tcg.c | 6 +-
11
3 files changed, 278 insertions(+), 127 deletions(-)
12
13
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/tcg/tcg-ldst.h
16
+++ b/include/tcg/tcg-ldst.h
17
@@ -XXX,XX +XXX,XX @@
18
#ifndef TCG_LDST_H
19
#define TCG_LDST_H
20
21
-#ifdef CONFIG_SOFTMMU
22
-
23
/* Value zero-extended to tcg register size. */
24
tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
25
MemOpIdx oi, uintptr_t retaddr);
26
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
27
void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
28
MemOpIdx oi, uintptr_t retaddr);
29
30
-#else
31
+#ifdef CONFIG_USER_ONLY
32
33
G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
34
G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
35
36
-#endif /* CONFIG_SOFTMMU */
37
+#endif /* CONFIG_USER_ONLY */
38
#endif /* TCG_LDST_H */
39
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/accel/tcg/user-exec.c
42
+++ b/accel/tcg/user-exec.c
43
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
44
45
/* The softmmu versions of these helpers are in cputlb.c. */
46
47
-/*
48
- * Verify that we have passed the correct MemOp to the correct function.
49
- *
50
- * We could present one function to target code, and dispatch based on
51
- * the MemOp, but so far we have worked hard to avoid an indirect function
52
- * call along the memory path.
53
- */
54
-static void validate_memop(MemOpIdx oi, MemOp expected)
55
-{
56
-#ifdef CONFIG_DEBUG_TCG
57
- MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
58
- assert(have == expected);
59
-#endif
60
-}
61
-
62
void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
63
{
64
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
65
@@ -XXX,XX +XXX,XX @@ void helper_unaligned_st(CPUArchState *env, target_ulong addr)
66
cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
67
}
68
69
-static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
70
- MemOpIdx oi, uintptr_t ra, MMUAccessType type)
71
+static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
72
+ MemOp mop, uintptr_t ra, MMUAccessType type)
73
{
74
- MemOp mop = get_memop(oi);
75
int a_bits = get_alignment_bits(mop);
76
void *ret;
77
78
@@ -XXX,XX +XXX,XX @@ static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
79
80
#include "ldst_atomicity.c.inc"
81
82
-uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
83
- MemOpIdx oi, uintptr_t ra)
84
+static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
85
+ MemOp mop, uintptr_t ra)
86
{
87
void *haddr;
88
uint8_t ret;
89
90
- validate_memop(oi, MO_UB);
91
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
92
+ tcg_debug_assert((mop & MO_SIZE) == MO_8);
93
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
94
ret = ldub_p(haddr);
95
clear_helper_retaddr();
96
+ return ret;
97
+}
98
+
99
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
100
+ MemOpIdx oi, uintptr_t ra)
101
+{
102
+ return do_ld1_mmu(env, addr, get_memop(oi), ra);
103
+}
104
+
105
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
106
+ MemOpIdx oi, uintptr_t ra)
107
+{
108
+ return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
109
+}
110
+
111
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
112
+ MemOpIdx oi, uintptr_t ra)
113
+{
114
+ uint8_t ret = do_ld1_mmu(env, addr, get_memop(oi), ra);
115
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
116
return ret;
117
}
118
119
+static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
120
+ MemOp mop, uintptr_t ra)
121
+{
122
+ void *haddr;
123
+ uint16_t ret;
124
+
125
+ tcg_debug_assert((mop & MO_SIZE) == MO_16);
126
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
127
+ ret = load_atom_2(env, ra, haddr, mop);
128
+ clear_helper_retaddr();
129
+ return ret;
130
+}
131
+
132
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
133
+ MemOpIdx oi, uintptr_t ra)
134
+{
135
+ MemOp mop = get_memop(oi);
136
+ uint16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
137
+
138
+ if (mop & MO_BSWAP) {
139
+ ret = bswap16(ret);
140
+ }
141
+ return ret;
142
+}
143
+
144
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
145
+ MemOpIdx oi, uintptr_t ra)
146
+{
147
+ MemOp mop = get_memop(oi);
148
+ int16_t ret = do_ld2_he_mmu(env, addr, mop, ra);
149
+
150
+ if (mop & MO_BSWAP) {
151
+ ret = bswap16(ret);
152
+ }
153
+ return ret;
154
+}
155
+
156
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
157
MemOpIdx oi, uintptr_t ra)
158
{
159
- void *haddr;
160
+ MemOp mop = get_memop(oi);
161
uint16_t ret;
162
163
- validate_memop(oi, MO_BEUW);
164
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
165
- ret = load_atom_2(env, ra, haddr, get_memop(oi));
166
- clear_helper_retaddr();
167
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
168
+ ret = do_ld2_he_mmu(env, addr, mop, ra);
169
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
170
return cpu_to_be16(ret);
171
}
172
173
-uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
174
- MemOpIdx oi, uintptr_t ra)
175
-{
176
- void *haddr;
177
- uint32_t ret;
178
-
179
- validate_memop(oi, MO_BEUL);
180
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
181
- ret = load_atom_4(env, ra, haddr, get_memop(oi));
182
- clear_helper_retaddr();
183
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
184
- return cpu_to_be32(ret);
185
-}
186
-
187
-uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
188
- MemOpIdx oi, uintptr_t ra)
189
-{
190
- void *haddr;
191
- uint64_t ret;
192
-
193
- validate_memop(oi, MO_BEUQ);
194
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
195
- ret = load_atom_8(env, ra, haddr, get_memop(oi));
196
- clear_helper_retaddr();
197
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
198
- return cpu_to_be64(ret);
199
-}
200
-
201
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
202
MemOpIdx oi, uintptr_t ra)
203
{
204
- void *haddr;
205
+ MemOp mop = get_memop(oi);
206
uint16_t ret;
207
208
- validate_memop(oi, MO_LEUW);
209
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
210
- ret = load_atom_2(env, ra, haddr, get_memop(oi));
211
- clear_helper_retaddr();
212
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
213
+ ret = do_ld2_he_mmu(env, addr, mop, ra);
214
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
215
return cpu_to_le16(ret);
216
}
217
218
+static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
219
+ MemOp mop, uintptr_t ra)
220
+{
221
+ void *haddr;
222
+ uint32_t ret;
223
+
224
+ tcg_debug_assert((mop & MO_SIZE) == MO_32);
225
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
226
+ ret = load_atom_4(env, ra, haddr, mop);
227
+ clear_helper_retaddr();
228
+ return ret;
229
+}
230
+
231
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
232
+ MemOpIdx oi, uintptr_t ra)
233
+{
234
+ MemOp mop = get_memop(oi);
235
+ uint32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
236
+
237
+ if (mop & MO_BSWAP) {
238
+ ret = bswap32(ret);
239
+ }
240
+ return ret;
241
+}
242
+
243
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
244
+ MemOpIdx oi, uintptr_t ra)
245
+{
246
+ MemOp mop = get_memop(oi);
247
+ int32_t ret = do_ld4_he_mmu(env, addr, mop, ra);
248
+
249
+ if (mop & MO_BSWAP) {
250
+ ret = bswap32(ret);
251
+ }
252
+ return ret;
253
+}
254
+
255
+uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
256
+ MemOpIdx oi, uintptr_t ra)
257
+{
258
+ MemOp mop = get_memop(oi);
259
+ uint32_t ret;
260
+
261
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
262
+ ret = do_ld4_he_mmu(env, addr, mop, ra);
263
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
264
+ return cpu_to_be32(ret);
265
+}
266
+
267
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
268
MemOpIdx oi, uintptr_t ra)
269
{
270
- void *haddr;
271
+ MemOp mop = get_memop(oi);
272
uint32_t ret;
273
274
- validate_memop(oi, MO_LEUL);
275
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
276
- ret = load_atom_4(env, ra, haddr, get_memop(oi));
277
- clear_helper_retaddr();
278
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
279
+ ret = do_ld4_he_mmu(env, addr, mop, ra);
280
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
281
return cpu_to_le32(ret);
282
}
283
284
+static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
285
+ MemOp mop, uintptr_t ra)
286
+{
287
+ void *haddr;
288
+ uint64_t ret;
289
+
290
+ tcg_debug_assert((mop & MO_SIZE) == MO_64);
291
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
292
+ ret = load_atom_8(env, ra, haddr, mop);
293
+ clear_helper_retaddr();
294
+ return ret;
295
+}
296
+
297
+uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
298
+ MemOpIdx oi, uintptr_t ra)
299
+{
300
+ MemOp mop = get_memop(oi);
301
+ uint64_t ret = do_ld8_he_mmu(env, addr, mop, ra);
302
+
303
+ if (mop & MO_BSWAP) {
304
+ ret = bswap64(ret);
305
+ }
306
+ return ret;
307
+}
308
+
309
+uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
310
+ MemOpIdx oi, uintptr_t ra)
311
+{
312
+ MemOp mop = get_memop(oi);
313
+ uint64_t ret;
314
+
315
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
316
+ ret = do_ld8_he_mmu(env, addr, mop, ra);
317
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
318
+ return cpu_to_be64(ret);
319
+}
320
+
321
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
322
MemOpIdx oi, uintptr_t ra)
323
{
324
- void *haddr;
325
+ MemOp mop = get_memop(oi);
326
uint64_t ret;
327
328
- validate_memop(oi, MO_LEUQ);
329
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
330
- ret = load_atom_8(env, ra, haddr, get_memop(oi));
331
- clear_helper_retaddr();
332
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
333
+ ret = do_ld8_he_mmu(env, addr, mop, ra);
334
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
335
return cpu_to_le64(ret);
336
}
337
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
338
void *haddr;
339
Int128 ret;
340
341
- validate_memop(oi, MO_128 | MO_BE);
342
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
343
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
344
memcpy(&ret, haddr, 16);
345
clear_helper_retaddr();
346
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
347
void *haddr;
348
Int128 ret;
349
350
- validate_memop(oi, MO_128 | MO_LE);
351
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
352
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
353
memcpy(&ret, haddr, 16);
354
clear_helper_retaddr();
355
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
356
return ret;
357
}
358
359
-void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
360
- MemOpIdx oi, uintptr_t ra)
361
+static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
362
+ MemOp mop, uintptr_t ra)
363
{
364
void *haddr;
365
366
- validate_memop(oi, MO_UB);
367
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
368
+ tcg_debug_assert((mop & MO_SIZE) == MO_8);
369
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
370
stb_p(haddr, val);
371
clear_helper_retaddr();
372
+}
373
+
374
+void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
375
+ MemOpIdx oi, uintptr_t ra)
376
+{
377
+ do_st1_mmu(env, addr, val, get_memop(oi), ra);
378
+}
379
+
380
+void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
381
+ MemOpIdx oi, uintptr_t ra)
382
+{
383
+ do_st1_mmu(env, addr, val, get_memop(oi), ra);
384
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
385
}
386
387
+static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
388
+ MemOp mop, uintptr_t ra)
389
+{
390
+ void *haddr;
391
+
392
+ tcg_debug_assert((mop & MO_SIZE) == MO_16);
393
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
394
+ store_atom_2(env, ra, haddr, mop, val);
395
+ clear_helper_retaddr();
396
+}
397
+
398
+void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
399
+ MemOpIdx oi, uintptr_t ra)
400
+{
401
+ MemOp mop = get_memop(oi);
402
+
403
+ if (mop & MO_BSWAP) {
404
+ val = bswap16(val);
405
+ }
406
+ do_st2_he_mmu(env, addr, val, mop, ra);
407
+}
408
+
409
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
410
MemOpIdx oi, uintptr_t ra)
411
{
412
- void *haddr;
413
+ MemOp mop = get_memop(oi);
414
415
- validate_memop(oi, MO_BEUW);
416
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
417
- store_atom_2(env, ra, haddr, get_memop(oi), be16_to_cpu(val));
418
- clear_helper_retaddr();
419
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
420
-}
421
-
422
-void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
423
- MemOpIdx oi, uintptr_t ra)
424
-{
425
- void *haddr;
426
-
427
- validate_memop(oi, MO_BEUL);
428
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
429
- store_atom_4(env, ra, haddr, get_memop(oi), be32_to_cpu(val));
430
- clear_helper_retaddr();
431
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
432
-}
433
-
434
-void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
435
- MemOpIdx oi, uintptr_t ra)
436
-{
437
- void *haddr;
438
-
439
- validate_memop(oi, MO_BEUQ);
440
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
441
- store_atom_8(env, ra, haddr, get_memop(oi), be64_to_cpu(val));
442
- clear_helper_retaddr();
443
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
444
+ do_st2_he_mmu(env, addr, be16_to_cpu(val), mop, ra);
445
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
446
}
447
448
void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
449
MemOpIdx oi, uintptr_t ra)
450
+{
451
+ MemOp mop = get_memop(oi);
452
+
453
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
454
+ do_st2_he_mmu(env, addr, le16_to_cpu(val), mop, ra);
455
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
456
+}
457
+
458
+static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
459
+ MemOp mop, uintptr_t ra)
460
{
461
void *haddr;
462
463
- validate_memop(oi, MO_LEUW);
464
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
465
- store_atom_2(env, ra, haddr, get_memop(oi), le16_to_cpu(val));
466
+ tcg_debug_assert((mop & MO_SIZE) == MO_32);
467
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
468
+ store_atom_4(env, ra, haddr, mop, val);
469
clear_helper_retaddr();
470
+}
471
+
472
+void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
473
+ MemOpIdx oi, uintptr_t ra)
474
+{
475
+ MemOp mop = get_memop(oi);
476
+
477
+ if (mop & MO_BSWAP) {
478
+ val = bswap32(val);
479
+ }
480
+ do_st4_he_mmu(env, addr, val, mop, ra);
481
+}
482
+
483
+void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
484
+ MemOpIdx oi, uintptr_t ra)
485
+{
486
+ MemOp mop = get_memop(oi);
487
+
488
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
489
+ do_st4_he_mmu(env, addr, be32_to_cpu(val), mop, ra);
490
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
491
}
492
493
void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
494
MemOpIdx oi, uintptr_t ra)
495
+{
496
+ MemOp mop = get_memop(oi);
497
+
498
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
499
+ do_st4_he_mmu(env, addr, le32_to_cpu(val), mop, ra);
500
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
501
+}
502
+
503
+static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
504
+ MemOp mop, uintptr_t ra)
505
{
506
void *haddr;
507
508
- validate_memop(oi, MO_LEUL);
509
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
510
- store_atom_4(env, ra, haddr, get_memop(oi), le32_to_cpu(val));
511
+ tcg_debug_assert((mop & MO_SIZE) == MO_64);
512
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
513
+ store_atom_8(env, ra, haddr, mop, val);
514
clear_helper_retaddr();
515
+}
516
+
517
+void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
518
+ MemOpIdx oi, uintptr_t ra)
519
+{
520
+ MemOp mop = get_memop(oi);
521
+
522
+ if (mop & MO_BSWAP) {
523
+ val = bswap64(val);
524
+ }
525
+ do_st8_he_mmu(env, addr, val, mop, ra);
526
+}
527
+
528
+void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
529
+ MemOpIdx oi, uintptr_t ra)
530
+{
531
+ MemOp mop = get_memop(oi);
532
+
533
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
534
+ do_st8_he_mmu(env, addr, cpu_to_be64(val), mop, ra);
535
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
536
}
537
538
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
539
MemOpIdx oi, uintptr_t ra)
540
{
541
- void *haddr;
542
+ MemOp mop = get_memop(oi);
543
544
- validate_memop(oi, MO_LEUQ);
545
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
546
- store_atom_8(env, ra, haddr, get_memop(oi), le64_to_cpu(val));
547
- clear_helper_retaddr();
548
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
549
+ do_st8_he_mmu(env, addr, cpu_to_le64(val), mop, ra);
550
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
551
}
552
553
@@ -XXX,XX +XXX,XX @@ void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
554
{
555
void *haddr;
556
557
- validate_memop(oi, MO_128 | MO_BE);
558
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
559
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
560
if (!HOST_BIG_ENDIAN) {
561
val = bswap128(val);
562
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
563
{
564
void *haddr;
565
566
- validate_memop(oi, MO_128 | MO_LE);
567
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
568
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
569
if (HOST_BIG_ENDIAN) {
570
val = bswap128(val);
571
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
572
void *haddr;
573
uint64_t ret;
574
575
- validate_memop(oi, MO_BEUQ);
576
haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
577
ret = ldq_p(haddr);
578
clear_helper_retaddr();
579
diff --git a/tcg/tcg.c b/tcg/tcg.c
580
index XXXXXXX..XXXXXXX 100644
581
--- a/tcg/tcg.c
582
+++ b/tcg/tcg.c
583
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
584
const TCGLdstHelperParam *p)
585
__attribute__((unused));
586
587
-#ifdef CONFIG_SOFTMMU
588
-static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
589
+static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
590
[MO_UB] = helper_ldub_mmu,
591
[MO_SB] = helper_ldsb_mmu,
592
[MO_UW] = helper_lduw_mmu,
593
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
594
#endif
595
};
596
597
-static void * const qemu_st_helpers[MO_SIZE + 1] = {
598
+static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
599
[MO_8] = helper_stb_mmu,
600
[MO_16] = helper_stw_mmu,
601
[MO_32] = helper_stl_mmu,
602
[MO_64] = helper_stq_mmu,
603
};
604
-#endif
605
606
TCGContext tcg_init_ctx;
607
__thread TCGContext *tcg_ctx;
608
--
609
2.34.1
diff view generated by jsdifflib
New patch
1
We can now fold these two pieces of code.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/tci.c | 89 -------------------------------------------------------
7
1 file changed, 89 deletions(-)
8
9
diff --git a/tcg/tci.c b/tcg/tci.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/tci.c
12
+++ b/tcg/tci.c
13
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
14
MemOp mop = get_memop(oi);
15
uintptr_t ra = (uintptr_t)tb_ptr;
16
17
-#ifdef CONFIG_SOFTMMU
18
switch (mop & MO_SSIZE) {
19
case MO_UB:
20
return helper_ldub_mmu(env, taddr, oi, ra);
21
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
22
default:
23
g_assert_not_reached();
24
}
25
-#else
26
- void *haddr = g2h(env_cpu(env), taddr);
27
- unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
28
- uint64_t ret;
29
-
30
- set_helper_retaddr(ra);
31
- if (taddr & a_mask) {
32
- helper_unaligned_ld(env, taddr);
33
- }
34
- switch (mop & (MO_BSWAP | MO_SSIZE)) {
35
- case MO_UB:
36
- ret = ldub_p(haddr);
37
- break;
38
- case MO_SB:
39
- ret = ldsb_p(haddr);
40
- break;
41
- case MO_LEUW:
42
- ret = lduw_le_p(haddr);
43
- break;
44
- case MO_LESW:
45
- ret = ldsw_le_p(haddr);
46
- break;
47
- case MO_LEUL:
48
- ret = (uint32_t)ldl_le_p(haddr);
49
- break;
50
- case MO_LESL:
51
- ret = (int32_t)ldl_le_p(haddr);
52
- break;
53
- case MO_LEUQ:
54
- ret = ldq_le_p(haddr);
55
- break;
56
- case MO_BEUW:
57
- ret = lduw_be_p(haddr);
58
- break;
59
- case MO_BESW:
60
- ret = ldsw_be_p(haddr);
61
- break;
62
- case MO_BEUL:
63
- ret = (uint32_t)ldl_be_p(haddr);
64
- break;
65
- case MO_BESL:
66
- ret = (int32_t)ldl_be_p(haddr);
67
- break;
68
- case MO_BEUQ:
69
- ret = ldq_be_p(haddr);
70
- break;
71
- default:
72
- g_assert_not_reached();
73
- }
74
- clear_helper_retaddr();
75
- return ret;
76
-#endif
77
}
78
79
static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
80
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
81
MemOp mop = get_memop(oi);
82
uintptr_t ra = (uintptr_t)tb_ptr;
83
84
-#ifdef CONFIG_SOFTMMU
85
switch (mop & MO_SIZE) {
86
case MO_UB:
87
helper_stb_mmu(env, taddr, val, oi, ra);
88
@@ -XXX,XX +XXX,XX @@ static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
89
default:
90
g_assert_not_reached();
91
}
92
-#else
93
- void *haddr = g2h(env_cpu(env), taddr);
94
- unsigned a_mask = (1u << get_alignment_bits(mop)) - 1;
95
-
96
- set_helper_retaddr(ra);
97
- if (taddr & a_mask) {
98
- helper_unaligned_st(env, taddr);
99
- }
100
- switch (mop & (MO_BSWAP | MO_SIZE)) {
101
- case MO_UB:
102
- stb_p(haddr, val);
103
- break;
104
- case MO_LEUW:
105
- stw_le_p(haddr, val);
106
- break;
107
- case MO_LEUL:
108
- stl_le_p(haddr, val);
109
- break;
110
- case MO_LEUQ:
111
- stq_le_p(haddr, val);
112
- break;
113
- case MO_BEUW:
114
- stw_be_p(haddr, val);
115
- break;
116
- case MO_BEUL:
117
- stl_be_p(haddr, val);
118
- break;
119
- case MO_BEUQ:
120
- stq_be_p(haddr, val);
121
- break;
122
- default:
123
- g_assert_not_reached();
124
- }
125
- clear_helper_retaddr();
126
-#endif
127
}
128
129
#if TCG_TARGET_REG_BITS == 64
130
--
131
2.34.1
diff view generated by jsdifflib
1
Split out the conditional conversion from a more complex logical
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
operation to a simple NOT. Create a couple more helpers to make
3
this easy for the outer-most logical operations.
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
4
accel/tcg/tcg-runtime.h | 3 +
9
1 file changed, 86 insertions(+), 72 deletions(-)
5
include/tcg/tcg-ldst.h | 4 +
6
accel/tcg/cputlb.c | 399 +++++++++++++++++++++++++--------
7
accel/tcg/user-exec.c | 94 ++++++--
8
tcg/tcg-op.c | 173 +++++++++-----
9
accel/tcg/ldst_atomicity.c.inc | 184 +++++++++++++++
10
6 files changed, 679 insertions(+), 178 deletions(-)
10
11
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
--- a/accel/tcg/tcg-runtime.h
14
+++ b/tcg/optimize.c
15
+++ b/accel/tcg/tcg-runtime.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
16
return false;
17
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
17
}
18
#endif /* IN_HELPER_PROTO */
19
20
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
21
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
22
+
23
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
24
i32, env, tl, i32, i32, i32)
25
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
26
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
27
index XXXXXXX..XXXXXXX 100644
28
--- a/include/tcg/tcg-ldst.h
29
+++ b/include/tcg/tcg-ldst.h
30
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
31
MemOpIdx oi, uintptr_t retaddr);
32
uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
33
MemOpIdx oi, uintptr_t retaddr);
34
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
35
+ MemOpIdx oi, uintptr_t retaddr);
36
37
/* Value sign-extended to tcg register size. */
38
tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
39
@@ -XXX,XX +XXX,XX @@ void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
40
MemOpIdx oi, uintptr_t retaddr);
41
void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
42
MemOpIdx oi, uintptr_t retaddr);
43
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
44
+ MemOpIdx oi, uintptr_t retaddr);
45
46
#ifdef CONFIG_USER_ONLY
47
48
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/accel/tcg/cputlb.c
51
+++ b/accel/tcg/cputlb.c
52
@@ -XXX,XX +XXX,XX @@
53
#include "qemu/plugin-memory.h"
54
#endif
55
#include "tcg/tcg-ldst.h"
56
+#include "exec/helper-proto.h"
57
58
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
59
/* #define DEBUG_TLB */
60
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_whole_be8(CPUArchState *env, uintptr_t ra,
61
return (ret_be << (p->size * 8)) | x;
62
}
63
64
+/**
65
+ * do_ld_parts_be16
66
+ * @p: translation parameters
67
+ * @ret_be: accumulated data
68
+ *
69
+ * As do_ld_bytes_beN, but with one atomic load.
70
+ * 16 aligned bytes are guaranteed to cover the load.
71
+ */
72
+static Int128 do_ld_whole_be16(CPUArchState *env, uintptr_t ra,
73
+ MMULookupPageData *p, uint64_t ret_be)
74
+{
75
+ int o = p->addr & 15;
76
+ Int128 x, y = load_atomic16_or_exit(env, ra, p->haddr - o);
77
+ int size = p->size;
78
+
79
+ if (!HOST_BIG_ENDIAN) {
80
+ y = bswap128(y);
81
+ }
82
+ y = int128_lshift(y, o * 8);
83
+ y = int128_urshift(y, (16 - size) * 8);
84
+ x = int128_make64(ret_be);
85
+ x = int128_lshift(x, size * 8);
86
+ return int128_or(x, y);
87
+}
88
+
89
/*
90
* Wrapper for the above.
91
*/
92
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
93
}
94
}
18
95
19
+/*
96
+/*
20
+ * Convert @op to NOT, if NOT is supported by the host.
97
+ * Wrapper for the above, for 8 < size < 16.
21
+ * Return true f the conversion is successful, which will still
22
+ * indicate that the processing is complete.
23
+ */
98
+ */
24
+static bool fold_not(OptContext *ctx, TCGOp *op);
99
+static Int128 do_ld16_beN(CPUArchState *env, MMULookupPageData *p,
25
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
100
+ uint64_t a, int mmu_idx, MemOp mop, uintptr_t ra)
26
+{
101
+{
27
+ TCGOpcode not_op;
102
+ int size = p->size;
28
+ bool have_not;
103
+ uint64_t b;
29
+
104
+ MemOp atom;
30
+ switch (ctx->type) {
105
+
31
+ case TCG_TYPE_I32:
106
+ if (unlikely(p->flags & TLB_MMIO)) {
32
+ not_op = INDEX_op_not_i32;
107
+ p->size = size - 8;
33
+ have_not = TCG_TARGET_HAS_not_i32;
108
+ a = do_ld_mmio_beN(env, p, a, mmu_idx, MMU_DATA_LOAD, ra);
109
+ p->addr += p->size;
110
+ p->size = 8;
111
+ b = do_ld_mmio_beN(env, p, 0, mmu_idx, MMU_DATA_LOAD, ra);
112
+ return int128_make128(b, a);
113
+ }
114
+
115
+ /*
116
+ * It is a given that we cross a page and therefore there is no
117
+ * atomicity for the load as a whole, but subobjects may need attention.
118
+ */
119
+ atom = mop & MO_ATOM_MASK;
120
+ switch (atom) {
121
+ case MO_ATOM_SUBALIGN:
122
+ p->size = size - 8;
123
+ a = do_ld_parts_beN(p, a);
124
+ p->haddr += size - 8;
125
+ p->size = 8;
126
+ b = do_ld_parts_beN(p, 0);
34
+ break;
127
+ break;
35
+ case TCG_TYPE_I64:
128
+
36
+ not_op = INDEX_op_not_i64;
129
+ case MO_ATOM_WITHIN16_PAIR:
37
+ have_not = TCG_TARGET_HAS_not_i64;
130
+ /* Since size > 8, this is the half that must be atomic. */
131
+ return do_ld_whole_be16(env, ra, p, a);
132
+
133
+ case MO_ATOM_IFALIGN_PAIR:
134
+ /*
135
+ * Since size > 8, both halves are misaligned,
136
+ * and so neither is atomic.
137
+ */
138
+ case MO_ATOM_IFALIGN:
139
+ case MO_ATOM_WITHIN16:
140
+ case MO_ATOM_NONE:
141
+ p->size = size - 8;
142
+ a = do_ld_bytes_beN(p, a);
143
+ b = ldq_be_p(p->haddr + size - 8);
38
+ break;
144
+ break;
39
+ case TCG_TYPE_V64:
145
+
40
+ case TCG_TYPE_V128:
146
+ default:
41
+ case TCG_TYPE_V256:
147
+ g_assert_not_reached();
42
+ not_op = INDEX_op_not_vec;
148
+ }
43
+ have_not = TCG_TARGET_HAS_not_vec;
149
+
150
+ return int128_make128(b, a);
151
+}
152
+
153
static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
154
MMUAccessType type, uintptr_t ra)
155
{
156
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
157
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
158
}
159
160
+static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
161
+ MemOpIdx oi, uintptr_t ra)
162
+{
163
+ MMULookupLocals l;
164
+ bool crosspage;
165
+ uint64_t a, b;
166
+ Int128 ret;
167
+ int first;
168
+
169
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD, &l);
170
+ if (likely(!crosspage)) {
171
+ /* Perform the load host endian. */
172
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
173
+ QEMU_IOTHREAD_LOCK_GUARD();
174
+ a = io_readx(env, l.page[0].full, l.mmu_idx, addr,
175
+ ra, MMU_DATA_LOAD, MO_64);
176
+ b = io_readx(env, l.page[0].full, l.mmu_idx, addr + 8,
177
+ ra, MMU_DATA_LOAD, MO_64);
178
+ ret = int128_make128(HOST_BIG_ENDIAN ? b : a,
179
+ HOST_BIG_ENDIAN ? a : b);
180
+ } else {
181
+ ret = load_atom_16(env, ra, l.page[0].haddr, l.memop);
182
+ }
183
+ if (l.memop & MO_BSWAP) {
184
+ ret = bswap128(ret);
185
+ }
186
+ return ret;
187
+ }
188
+
189
+ first = l.page[0].size;
190
+ if (first == 8) {
191
+ MemOp mop8 = (l.memop & ~MO_SIZE) | MO_64;
192
+
193
+ a = do_ld_8(env, &l.page[0], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
194
+ b = do_ld_8(env, &l.page[1], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
195
+ if ((mop8 & MO_BSWAP) == MO_LE) {
196
+ ret = int128_make128(a, b);
197
+ } else {
198
+ ret = int128_make128(b, a);
199
+ }
200
+ return ret;
201
+ }
202
+
203
+ if (first < 8) {
204
+ a = do_ld_beN(env, &l.page[0], 0, l.mmu_idx,
205
+ MMU_DATA_LOAD, l.memop, ra);
206
+ ret = do_ld16_beN(env, &l.page[1], a, l.mmu_idx, l.memop, ra);
207
+ } else {
208
+ ret = do_ld16_beN(env, &l.page[0], 0, l.mmu_idx, l.memop, ra);
209
+ b = int128_getlo(ret);
210
+ ret = int128_lshift(ret, l.page[1].size * 8);
211
+ a = int128_gethi(ret);
212
+ b = do_ld_beN(env, &l.page[1], b, l.mmu_idx,
213
+ MMU_DATA_LOAD, l.memop, ra);
214
+ ret = int128_make128(b, a);
215
+ }
216
+ if ((l.memop & MO_BSWAP) == MO_LE) {
217
+ ret = bswap128(ret);
218
+ }
219
+ return ret;
220
+}
221
+
222
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
223
+ uint32_t oi, uintptr_t retaddr)
224
+{
225
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
226
+ return do_ld16_mmu(env, addr, oi, retaddr);
227
+}
228
+
229
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
230
+{
231
+ return helper_ld16_mmu(env, addr, oi, GETPC());
232
+}
233
+
234
/*
235
* Load helpers for cpu_ldst.h.
236
*/
237
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
238
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
239
MemOpIdx oi, uintptr_t ra)
240
{
241
- MemOp mop = get_memop(oi);
242
- int mmu_idx = get_mmuidx(oi);
243
- MemOpIdx new_oi;
244
- unsigned a_bits;
245
- uint64_t h, l;
246
+ Int128 ret;
247
248
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
249
- a_bits = get_alignment_bits(mop);
250
-
251
- /* Handle CPU specific unaligned behaviour */
252
- if (addr & ((1 << a_bits) - 1)) {
253
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
254
- mmu_idx, ra);
255
- }
256
-
257
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
258
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
259
- new_oi = make_memop_idx(mop, mmu_idx);
260
-
261
- h = helper_ldq_mmu(env, addr, new_oi, ra);
262
- l = helper_ldq_mmu(env, addr + 8, new_oi, ra);
263
-
264
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
265
- return int128_make128(l, h);
266
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
267
+ ret = do_ld16_mmu(env, addr, oi, ra);
268
+ plugin_load_cb(env, addr, oi);
269
+ return ret;
270
}
271
272
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
273
MemOpIdx oi, uintptr_t ra)
274
{
275
- MemOp mop = get_memop(oi);
276
- int mmu_idx = get_mmuidx(oi);
277
- MemOpIdx new_oi;
278
- unsigned a_bits;
279
- uint64_t h, l;
280
+ Int128 ret;
281
282
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
283
- a_bits = get_alignment_bits(mop);
284
-
285
- /* Handle CPU specific unaligned behaviour */
286
- if (addr & ((1 << a_bits) - 1)) {
287
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
288
- mmu_idx, ra);
289
- }
290
-
291
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
292
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
293
- new_oi = make_memop_idx(mop, mmu_idx);
294
-
295
- l = helper_ldq_mmu(env, addr, new_oi, ra);
296
- h = helper_ldq_mmu(env, addr + 8, new_oi, ra);
297
-
298
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
299
- return int128_make128(l, h);
300
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
301
+ ret = do_ld16_mmu(env, addr, oi, ra);
302
+ plugin_load_cb(env, addr, oi);
303
+ return ret;
304
}
305
306
/*
307
@@ -XXX,XX +XXX,XX @@ static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
308
}
309
}
310
311
+/*
312
+ * Wrapper for the above, for 8 < size < 16.
313
+ */
314
+static uint64_t do_st16_leN(CPUArchState *env, MMULookupPageData *p,
315
+ Int128 val_le, int mmu_idx,
316
+ MemOp mop, uintptr_t ra)
317
+{
318
+ int size = p->size;
319
+ MemOp atom;
320
+
321
+ if (unlikely(p->flags & TLB_MMIO)) {
322
+ p->size = 8;
323
+ do_st_mmio_leN(env, p, int128_getlo(val_le), mmu_idx, ra);
324
+ p->size = size - 8;
325
+ p->addr += 8;
326
+ return do_st_mmio_leN(env, p, int128_gethi(val_le), mmu_idx, ra);
327
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
328
+ return int128_gethi(val_le) >> ((size - 8) * 8);
329
+ }
330
+
331
+ /*
332
+ * It is a given that we cross a page and therefore there is no atomicity
333
+ * for the store as a whole, but subobjects may need attention.
334
+ */
335
+ atom = mop & MO_ATOM_MASK;
336
+ switch (atom) {
337
+ case MO_ATOM_SUBALIGN:
338
+ store_parts_leN(p->haddr, 8, int128_getlo(val_le));
339
+ return store_parts_leN(p->haddr + 8, p->size - 8,
340
+ int128_gethi(val_le));
341
+
342
+ case MO_ATOM_WITHIN16_PAIR:
343
+ /* Since size > 8, this is the half that must be atomic. */
344
+ if (!HAVE_al16) {
345
+ cpu_loop_exit_atomic(env_cpu(env), ra);
346
+ }
347
+ return store_whole_le16(p->haddr, p->size, val_le);
348
+
349
+ case MO_ATOM_IFALIGN_PAIR:
350
+ /*
351
+ * Since size > 8, both halves are misaligned,
352
+ * and so neither is atomic.
353
+ */
354
+ case MO_ATOM_IFALIGN:
355
+ case MO_ATOM_NONE:
356
+ stq_le_p(p->haddr, int128_getlo(val_le));
357
+ return store_bytes_leN(p->haddr + 8, p->size - 8,
358
+ int128_gethi(val_le));
359
+
360
+ default:
361
+ g_assert_not_reached();
362
+ }
363
+}
364
+
365
static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
366
int mmu_idx, uintptr_t ra)
367
{
368
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
369
do_st8_mmu(env, addr, val, oi, retaddr);
370
}
371
372
+static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
373
+ MemOpIdx oi, uintptr_t ra)
374
+{
375
+ MMULookupLocals l;
376
+ bool crosspage;
377
+ uint64_t a, b;
378
+ int first;
379
+
380
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
381
+ if (likely(!crosspage)) {
382
+ /* Swap to host endian if necessary, then store. */
383
+ if (l.memop & MO_BSWAP) {
384
+ val = bswap128(val);
385
+ }
386
+ if (unlikely(l.page[0].flags & TLB_MMIO)) {
387
+ QEMU_IOTHREAD_LOCK_GUARD();
388
+ if (HOST_BIG_ENDIAN) {
389
+ b = int128_getlo(val), a = int128_gethi(val);
390
+ } else {
391
+ a = int128_getlo(val), b = int128_gethi(val);
392
+ }
393
+ io_writex(env, l.page[0].full, l.mmu_idx, a, addr, ra, MO_64);
394
+ io_writex(env, l.page[0].full, l.mmu_idx, b, addr + 8, ra, MO_64);
395
+ } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
396
+ /* nothing */
397
+ } else {
398
+ store_atom_16(env, ra, l.page[0].haddr, l.memop, val);
399
+ }
400
+ return;
401
+ }
402
+
403
+ first = l.page[0].size;
404
+ if (first == 8) {
405
+ MemOp mop8 = (l.memop & ~(MO_SIZE | MO_BSWAP)) | MO_64;
406
+
407
+ if (l.memop & MO_BSWAP) {
408
+ val = bswap128(val);
409
+ }
410
+ if (HOST_BIG_ENDIAN) {
411
+ b = int128_getlo(val), a = int128_gethi(val);
412
+ } else {
413
+ a = int128_getlo(val), b = int128_gethi(val);
414
+ }
415
+ do_st_8(env, &l.page[0], a, l.mmu_idx, mop8, ra);
416
+ do_st_8(env, &l.page[1], b, l.mmu_idx, mop8, ra);
417
+ return;
418
+ }
419
+
420
+ if ((l.memop & MO_BSWAP) != MO_LE) {
421
+ val = bswap128(val);
422
+ }
423
+ if (first < 8) {
424
+ do_st_leN(env, &l.page[0], int128_getlo(val), l.mmu_idx, l.memop, ra);
425
+ val = int128_urshift(val, first * 8);
426
+ do_st16_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
427
+ } else {
428
+ b = do_st16_leN(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
429
+ do_st_leN(env, &l.page[1], b, l.mmu_idx, l.memop, ra);
430
+ }
431
+}
432
+
433
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
434
+ MemOpIdx oi, uintptr_t retaddr)
435
+{
436
+ tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
437
+ do_st16_mmu(env, addr, val, oi, retaddr);
438
+}
439
+
440
+void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
441
+ MemOpIdx oi)
442
+{
443
+ helper_st16_mmu(env, addr, val, oi, GETPC());
444
+}
445
+
446
/*
447
* Store Helpers for cpu_ldst.h
448
*/
449
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
450
plugin_store_cb(env, addr, oi);
451
}
452
453
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
454
- MemOpIdx oi, uintptr_t ra)
455
+void cpu_st16_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
456
+ MemOpIdx oi, uintptr_t retaddr)
457
{
458
- MemOp mop = get_memop(oi);
459
- int mmu_idx = get_mmuidx(oi);
460
- MemOpIdx new_oi;
461
- unsigned a_bits;
462
-
463
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
464
- a_bits = get_alignment_bits(mop);
465
-
466
- /* Handle CPU specific unaligned behaviour */
467
- if (addr & ((1 << a_bits) - 1)) {
468
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
469
- mmu_idx, ra);
470
- }
471
-
472
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
473
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
474
- new_oi = make_memop_idx(mop, mmu_idx);
475
-
476
- helper_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
477
- helper_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
478
-
479
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
480
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_BE|MO_128));
481
+ do_st16_mmu(env, addr, val, oi, retaddr);
482
+ plugin_store_cb(env, addr, oi);
483
}
484
485
-void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
486
- MemOpIdx oi, uintptr_t ra)
487
+void cpu_st16_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
488
+ MemOpIdx oi, uintptr_t retaddr)
489
{
490
- MemOp mop = get_memop(oi);
491
- int mmu_idx = get_mmuidx(oi);
492
- MemOpIdx new_oi;
493
- unsigned a_bits;
494
-
495
- tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
496
- a_bits = get_alignment_bits(mop);
497
-
498
- /* Handle CPU specific unaligned behaviour */
499
- if (addr & ((1 << a_bits) - 1)) {
500
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
501
- mmu_idx, ra);
502
- }
503
-
504
- /* Construct an unaligned 64-bit replacement MemOpIdx. */
505
- mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
506
- new_oi = make_memop_idx(mop, mmu_idx);
507
-
508
- helper_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
509
- helper_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
510
-
511
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
512
+ tcg_debug_assert((get_memop(oi) & (MO_BSWAP|MO_SIZE)) == (MO_LE|MO_128));
513
+ do_st16_mmu(env, addr, val, oi, retaddr);
514
+ plugin_store_cb(env, addr, oi);
515
}
516
517
#include "ldst_common.c.inc"
518
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
519
index XXXXXXX..XXXXXXX 100644
520
--- a/accel/tcg/user-exec.c
521
+++ b/accel/tcg/user-exec.c
522
@@ -XXX,XX +XXX,XX @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
523
return cpu_to_le64(ret);
524
}
525
526
-Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
527
- MemOpIdx oi, uintptr_t ra)
528
+static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
529
+ MemOp mop, uintptr_t ra)
530
{
531
void *haddr;
532
Int128 ret;
533
534
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
535
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
536
- memcpy(&ret, haddr, 16);
537
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
538
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
539
+ ret = load_atom_16(env, ra, haddr, mop);
540
clear_helper_retaddr();
541
- qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
542
+ return ret;
543
+}
544
545
+Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
546
+ MemOpIdx oi, uintptr_t ra)
547
+{
548
+ MemOp mop = get_memop(oi);
549
+ Int128 ret = do_ld16_he_mmu(env, addr, mop, ra);
550
+
551
+ if (mop & MO_BSWAP) {
552
+ ret = bswap128(ret);
553
+ }
554
+ return ret;
555
+}
556
+
557
+Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
558
+{
559
+ return helper_ld16_mmu(env, addr, oi, GETPC());
560
+}
561
+
562
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
563
+ MemOpIdx oi, uintptr_t ra)
564
+{
565
+ MemOp mop = get_memop(oi);
566
+ Int128 ret;
567
+
568
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
569
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
570
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
571
if (!HOST_BIG_ENDIAN) {
572
ret = bswap128(ret);
573
}
574
@@ -XXX,XX +XXX,XX @@ Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
575
Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
576
MemOpIdx oi, uintptr_t ra)
577
{
578
- void *haddr;
579
+ MemOp mop = get_memop(oi);
580
Int128 ret;
581
582
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
583
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
584
- memcpy(&ret, haddr, 16);
585
- clear_helper_retaddr();
586
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
587
+ ret = do_ld16_he_mmu(env, addr, mop, ra);
588
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
589
-
590
if (HOST_BIG_ENDIAN) {
591
ret = bswap128(ret);
592
}
593
@@ -XXX,XX +XXX,XX @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
594
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
595
}
596
597
-void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
598
- Int128 val, MemOpIdx oi, uintptr_t ra)
599
+static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
600
+ MemOp mop, uintptr_t ra)
601
{
602
void *haddr;
603
604
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_BE));
605
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
606
+ tcg_debug_assert((mop & MO_SIZE) == MO_128);
607
+ haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
608
+ store_atom_16(env, ra, haddr, mop, val);
609
+ clear_helper_retaddr();
610
+}
611
+
612
+void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
613
+ MemOpIdx oi, uintptr_t ra)
614
+{
615
+ MemOp mop = get_memop(oi);
616
+
617
+ if (mop & MO_BSWAP) {
618
+ val = bswap128(val);
619
+ }
620
+ do_st16_he_mmu(env, addr, val, mop, ra);
621
+}
622
+
623
+void helper_st_i128(CPUArchState *env, target_ulong addr,
624
+ Int128 val, MemOpIdx oi)
625
+{
626
+ helper_st16_mmu(env, addr, val, oi, GETPC());
627
+}
628
+
629
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
630
+ Int128 val, MemOpIdx oi, uintptr_t ra)
631
+{
632
+ MemOp mop = get_memop(oi);
633
+
634
+ tcg_debug_assert((mop & MO_BSWAP) == MO_BE);
635
if (!HOST_BIG_ENDIAN) {
636
val = bswap128(val);
637
}
638
- memcpy(haddr, &val, 16);
639
- clear_helper_retaddr();
640
+ do_st16_he_mmu(env, addr, val, mop, ra);
641
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
642
}
643
644
void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
645
Int128 val, MemOpIdx oi, uintptr_t ra)
646
{
647
- void *haddr;
648
+ MemOp mop = get_memop(oi);
649
650
- tcg_debug_assert((get_memop(oi) & (MO_BSWAP | MO_SIZE)) == (MO_128 | MO_LE));
651
- haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
652
+ tcg_debug_assert((mop & MO_BSWAP) == MO_LE);
653
if (HOST_BIG_ENDIAN) {
654
val = bswap128(val);
655
}
656
- memcpy(haddr, &val, 16);
657
- clear_helper_retaddr();
658
+ do_st16_he_mmu(env, addr, val, mop, ra);
659
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
660
}
661
662
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
663
index XXXXXXX..XXXXXXX 100644
664
--- a/tcg/tcg-op.c
665
+++ b/tcg/tcg-op.c
666
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
667
}
668
}
669
670
+/*
671
+ * Return true if @mop, without knowledge of the pointer alignment,
672
+ * does not require 16-byte atomicity, and it would be adventagous
673
+ * to avoid a call to a helper function.
674
+ */
675
+static bool use_two_i64_for_i128(MemOp mop)
676
+{
677
+#ifdef CONFIG_SOFTMMU
678
+ /* Two softmmu tlb lookups is larger than one function call. */
679
+ return false;
680
+#else
681
+ /*
682
+ * For user-only, two 64-bit operations may well be smaller than a call.
683
+ * Determine if that would be legal for the requested atomicity.
684
+ */
685
+ switch (mop & MO_ATOM_MASK) {
686
+ case MO_ATOM_NONE:
687
+ case MO_ATOM_IFALIGN_PAIR:
688
+ return true;
689
+ case MO_ATOM_IFALIGN:
690
+ case MO_ATOM_SUBALIGN:
691
+ case MO_ATOM_WITHIN16:
692
+ case MO_ATOM_WITHIN16_PAIR:
693
+ /* In a serialized context, no atomicity is required. */
694
+ return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
695
+ default:
696
+ g_assert_not_reached();
697
+ }
698
+#endif
699
+}
700
+
701
static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
702
{
703
MemOp mop_1 = orig, mop_2;
704
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
705
ret[1] = mop_2;
706
}
707
708
+#if TARGET_LONG_BITS == 64
709
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
710
+#else
711
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
712
+#endif
713
+
714
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
715
{
716
- MemOp mop[2];
717
- TCGv addr_p8;
718
- TCGv_i64 x, y;
719
+ MemOpIdx oi = make_memop_idx(memop, idx);
720
721
- canonicalize_memop_i128_as_i64(mop, memop);
722
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
723
+ tcg_debug_assert((memop & MO_SIGN) == 0);
724
725
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
726
addr = plugin_prep_mem_callbacks(addr);
727
728
- /* TODO: respect atomicity of the operation. */
729
/* TODO: allow the tcg backend to see the whole operation. */
730
731
- /*
732
- * Since there are no global TCGv_i128, there is no visible state
733
- * changed if the second load faults. Load directly into the two
734
- * subwords.
735
- */
736
- if ((memop & MO_BSWAP) == MO_LE) {
737
- x = TCGV128_LOW(val);
738
- y = TCGV128_HIGH(val);
739
+ if (use_two_i64_for_i128(memop)) {
740
+ MemOp mop[2];
741
+ TCGv addr_p8;
742
+ TCGv_i64 x, y;
743
+
744
+ canonicalize_memop_i128_as_i64(mop, memop);
745
+
746
+ /*
747
+ * Since there are no global TCGv_i128, there is no visible state
748
+ * changed if the second load faults. Load directly into the two
749
+ * subwords.
750
+ */
751
+ if ((memop & MO_BSWAP) == MO_LE) {
752
+ x = TCGV128_LOW(val);
753
+ y = TCGV128_HIGH(val);
754
+ } else {
755
+ x = TCGV128_HIGH(val);
756
+ y = TCGV128_LOW(val);
757
+ }
758
+
759
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
760
+
761
+ if ((mop[0] ^ memop) & MO_BSWAP) {
762
+ tcg_gen_bswap64_i64(x, x);
763
+ }
764
+
765
+ addr_p8 = tcg_temp_ebb_new();
766
+ tcg_gen_addi_tl(addr_p8, addr, 8);
767
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
768
+ tcg_temp_free(addr_p8);
769
+
770
+ if ((mop[0] ^ memop) & MO_BSWAP) {
771
+ tcg_gen_bswap64_i64(y, y);
772
+ }
773
} else {
774
- x = TCGV128_HIGH(val);
775
- y = TCGV128_LOW(val);
776
+ gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
777
}
778
779
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
780
-
781
- if ((mop[0] ^ memop) & MO_BSWAP) {
782
- tcg_gen_bswap64_i64(x, x);
783
- }
784
-
785
- addr_p8 = tcg_temp_new();
786
- tcg_gen_addi_tl(addr_p8, addr, 8);
787
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
788
- tcg_temp_free(addr_p8);
789
-
790
- if ((mop[0] ^ memop) & MO_BSWAP) {
791
- tcg_gen_bswap64_i64(y, y);
792
- }
793
-
794
- plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
795
- QEMU_PLUGIN_MEM_R);
796
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
797
}
798
799
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
800
{
801
- MemOp mop[2];
802
- TCGv addr_p8;
803
- TCGv_i64 x, y;
804
+ MemOpIdx oi = make_memop_idx(memop, idx);
805
806
- canonicalize_memop_i128_as_i64(mop, memop);
807
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
808
+ tcg_debug_assert((memop & MO_SIGN) == 0);
809
810
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
811
addr = plugin_prep_mem_callbacks(addr);
812
813
- /* TODO: respect atomicity of the operation. */
814
/* TODO: allow the tcg backend to see the whole operation. */
815
816
- if ((memop & MO_BSWAP) == MO_LE) {
817
- x = TCGV128_LOW(val);
818
- y = TCGV128_HIGH(val);
819
+ if (use_two_i64_for_i128(memop)) {
820
+ MemOp mop[2];
821
+ TCGv addr_p8;
822
+ TCGv_i64 x, y;
823
+
824
+ canonicalize_memop_i128_as_i64(mop, memop);
825
+
826
+ if ((memop & MO_BSWAP) == MO_LE) {
827
+ x = TCGV128_LOW(val);
828
+ y = TCGV128_HIGH(val);
829
+ } else {
830
+ x = TCGV128_HIGH(val);
831
+ y = TCGV128_LOW(val);
832
+ }
833
+
834
+ addr_p8 = tcg_temp_ebb_new();
835
+ if ((mop[0] ^ memop) & MO_BSWAP) {
836
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
837
+
838
+ tcg_gen_bswap64_i64(t, x);
839
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
840
+ tcg_gen_bswap64_i64(t, y);
841
+ tcg_gen_addi_tl(addr_p8, addr, 8);
842
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
843
+ tcg_temp_free_i64(t);
844
+ } else {
845
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
846
+ tcg_gen_addi_tl(addr_p8, addr, 8);
847
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
848
+ }
849
+ tcg_temp_free(addr_p8);
850
} else {
851
- x = TCGV128_HIGH(val);
852
- y = TCGV128_LOW(val);
853
+ gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
854
}
855
856
- addr_p8 = tcg_temp_new();
857
- if ((mop[0] ^ memop) & MO_BSWAP) {
858
- TCGv_i64 t = tcg_temp_ebb_new_i64();
859
-
860
- tcg_gen_bswap64_i64(t, x);
861
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
862
- tcg_gen_bswap64_i64(t, y);
863
- tcg_gen_addi_tl(addr_p8, addr, 8);
864
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
865
- tcg_temp_free_i64(t);
866
- } else {
867
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
868
- tcg_gen_addi_tl(addr_p8, addr, 8);
869
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
870
- }
871
- tcg_temp_free(addr_p8);
872
-
873
- plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
874
- QEMU_PLUGIN_MEM_W);
875
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
876
}
877
878
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
879
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
880
index XXXXXXX..XXXXXXX 100644
881
--- a/accel/tcg/ldst_atomicity.c.inc
882
+++ b/accel/tcg/ldst_atomicity.c.inc
883
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atom_8_by_4(void *pv)
884
}
885
}
886
887
+/**
888
+ * load_atom_8_by_8_or_4:
889
+ * @pv: host address
890
+ *
891
+ * Load 8 bytes from aligned @pv, with at least 4-byte atomicity.
892
+ */
893
+static inline uint64_t load_atom_8_by_8_or_4(void *pv)
894
+{
895
+ if (HAVE_al8_fast) {
896
+ return load_atomic8(pv);
897
+ } else {
898
+ return load_atom_8_by_4(pv);
899
+ }
900
+}
901
+
902
/**
903
* load_atom_2:
904
* @p: host address
905
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_8(CPUArchState *env, uintptr_t ra,
906
}
907
}
908
909
+/**
910
+ * load_atom_16:
911
+ * @p: host address
912
+ * @memop: the full memory op
913
+ *
914
+ * Load 16 bytes from @p, honoring the atomicity of @memop.
915
+ */
916
+static Int128 load_atom_16(CPUArchState *env, uintptr_t ra,
917
+ void *pv, MemOp memop)
918
+{
919
+ uintptr_t pi = (uintptr_t)pv;
920
+ int atmax;
921
+ Int128 r;
922
+ uint64_t a, b;
923
+
924
+ /*
925
+ * If the host does not support 16-byte atomics, wait until we have
926
+ * examined the atomicity parameters below.
927
+ */
928
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
929
+ return load_atomic16(pv);
930
+ }
931
+
932
+ atmax = required_atomicity(env, pi, memop);
933
+ switch (atmax) {
934
+ case MO_8:
935
+ memcpy(&r, pv, 16);
936
+ return r;
937
+ case MO_16:
938
+ a = load_atom_8_by_2(pv);
939
+ b = load_atom_8_by_2(pv + 8);
940
+ break;
941
+ case MO_32:
942
+ a = load_atom_8_by_4(pv);
943
+ b = load_atom_8_by_4(pv + 8);
944
+ break;
945
+ case MO_64:
946
+ if (!HAVE_al8) {
947
+ cpu_loop_exit_atomic(env_cpu(env), ra);
948
+ }
949
+ a = load_atomic8(pv);
950
+ b = load_atomic8(pv + 8);
951
+ break;
952
+ case -MO_64:
953
+ if (!HAVE_al8) {
954
+ cpu_loop_exit_atomic(env_cpu(env), ra);
955
+ }
956
+ a = load_atom_extract_al8x2(pv);
957
+ b = load_atom_extract_al8x2(pv + 8);
958
+ break;
959
+ case MO_128:
960
+ return load_atomic16_or_exit(env, ra, pv);
961
+ default:
962
+ g_assert_not_reached();
963
+ }
964
+ return int128_make128(HOST_BIG_ENDIAN ? b : a, HOST_BIG_ENDIAN ? a : b);
965
+}
966
+
967
/**
968
* store_atomic2:
969
* @pv: host address
970
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
971
qatomic_set__nocheck(p, val);
972
}
973
974
+/**
975
+ * store_atomic16:
976
+ * @pv: host address
977
+ * @val: value to store
978
+ *
979
+ * Atomically store 16 aligned bytes to @pv.
980
+ */
981
+static inline void store_atomic16(void *pv, Int128Alias val)
982
+{
983
+#if defined(CONFIG_ATOMIC128)
984
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
985
+ qatomic_set__nocheck(pu, val.u);
986
+#elif defined(CONFIG_CMPXCHG128)
987
+ __uint128_t *pu = __builtin_assume_aligned(pv, 16);
988
+ __uint128_t o;
989
+
990
+ /*
991
+ * Without CONFIG_ATOMIC128, __atomic_compare_exchange_n will always
992
+ * defer to libatomic, so we must use __sync_*_compare_and_swap_16
993
+ * and accept the sequential consistency that comes with it.
994
+ */
995
+ do {
996
+ o = *pu;
997
+ } while (!__sync_bool_compare_and_swap_16(pu, o, val.u));
998
+#else
999
+ qemu_build_not_reached();
1000
+#endif
1001
+}
1002
+
1003
/**
1004
* store_atom_4x2
1005
*/
1006
@@ -XXX,XX +XXX,XX @@ static void store_atom_8(CPUArchState *env, uintptr_t ra,
1007
}
1008
cpu_loop_exit_atomic(env_cpu(env), ra);
1009
}
1010
+
1011
+/**
1012
+ * store_atom_16:
1013
+ * @p: host address
1014
+ * @val: the value to store
1015
+ * @memop: the full memory op
1016
+ *
1017
+ * Store 16 bytes to @p, honoring the atomicity of @memop.
1018
+ */
1019
+static void store_atom_16(CPUArchState *env, uintptr_t ra,
1020
+ void *pv, MemOp memop, Int128 val)
1021
+{
1022
+ uintptr_t pi = (uintptr_t)pv;
1023
+ uint64_t a, b;
1024
+ int atmax;
1025
+
1026
+ if (HAVE_al16_fast && likely((pi & 15) == 0)) {
1027
+ store_atomic16(pv, val);
1028
+ return;
1029
+ }
1030
+
1031
+ atmax = required_atomicity(env, pi, memop);
1032
+
1033
+ a = HOST_BIG_ENDIAN ? int128_gethi(val) : int128_getlo(val);
1034
+ b = HOST_BIG_ENDIAN ? int128_getlo(val) : int128_gethi(val);
1035
+ switch (atmax) {
1036
+ case MO_8:
1037
+ memcpy(pv, &val, 16);
1038
+ return;
1039
+ case MO_16:
1040
+ store_atom_8_by_2(pv, a);
1041
+ store_atom_8_by_2(pv + 8, b);
1042
+ return;
1043
+ case MO_32:
1044
+ store_atom_8_by_4(pv, a);
1045
+ store_atom_8_by_4(pv + 8, b);
1046
+ return;
1047
+ case MO_64:
1048
+ if (HAVE_al8) {
1049
+ store_atomic8(pv, a);
1050
+ store_atomic8(pv + 8, b);
1051
+ return;
1052
+ }
1053
+ break;
1054
+ case -MO_64:
1055
+ if (HAVE_al16) {
1056
+ uint64_t val_le;
1057
+ int s2 = pi & 15;
1058
+ int s1 = 16 - s2;
1059
+
1060
+ if (HOST_BIG_ENDIAN) {
1061
+ val = bswap128(val);
1062
+ }
1063
+ switch (s2) {
1064
+ case 1 ... 7:
1065
+ val_le = store_whole_le16(pv, s1, val);
1066
+ store_bytes_leN(pv + s1, s2, val_le);
1067
+ break;
1068
+ case 9 ... 15:
1069
+ store_bytes_leN(pv, s1, int128_getlo(val));
1070
+ val = int128_urshift(val, s1 * 8);
1071
+ store_whole_le16(pv + s1, s2, val);
1072
+ break;
1073
+ case 0: /* aligned */
1074
+ case 8: /* atmax MO_64 */
1075
+ default:
1076
+ g_assert_not_reached();
1077
+ }
1078
+ return;
1079
+ }
1080
+ break;
1081
+ case MO_128:
1082
+ if (HAVE_al16) {
1083
+ store_atomic16(pv, val);
1084
+ return;
1085
+ }
44
+ break;
1086
+ break;
45
+ default:
1087
+ default:
46
+ g_assert_not_reached();
1088
+ g_assert_not_reached();
47
+ }
1089
+ }
48
+ if (have_not) {
1090
+ cpu_loop_exit_atomic(env_cpu(env), ra);
49
+ op->opc = not_op;
1091
+}
50
+ op->args[1] = op->args[idx];
51
+ return fold_not(ctx, op);
52
+ }
53
+ return false;
54
+}
55
+
56
+/* If the binary operation has first argument @i, fold to NOT. */
57
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
58
+{
59
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
60
+ return fold_to_not(ctx, op, 2);
61
+ }
62
+ return false;
63
+}
64
+
65
/* If the binary operation has second argument @i, fold to @i. */
66
static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
67
{
68
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
69
return false;
70
}
71
72
+/* If the binary operation has second argument @i, fold to NOT. */
73
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
74
+{
75
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
76
+ return fold_to_not(ctx, op, 1);
77
+ }
78
+ return false;
79
+}
80
+
81
/* If the binary operation has both arguments equal, fold to @i. */
82
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
83
{
84
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
85
static bool fold_andc(OptContext *ctx, TCGOp *op)
86
{
87
if (fold_const2(ctx, op) ||
88
- fold_xx_to_i(ctx, op, 0)) {
89
+ fold_xx_to_i(ctx, op, 0) ||
90
+ fold_ix_to_not(ctx, op, -1)) {
91
return true;
92
}
93
return false;
94
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
95
96
static bool fold_eqv(OptContext *ctx, TCGOp *op)
97
{
98
- return fold_const2(ctx, op);
99
+ if (fold_const2(ctx, op) ||
100
+ fold_xi_to_not(ctx, op, 0)) {
101
+ return true;
102
+ }
103
+ return false;
104
}
105
106
static bool fold_extract(OptContext *ctx, TCGOp *op)
107
@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
108
109
static bool fold_nand(OptContext *ctx, TCGOp *op)
110
{
111
- return fold_const2(ctx, op);
112
+ if (fold_const2(ctx, op) ||
113
+ fold_xi_to_not(ctx, op, -1)) {
114
+ return true;
115
+ }
116
+ return false;
117
}
118
119
static bool fold_neg(OptContext *ctx, TCGOp *op)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
121
122
static bool fold_nor(OptContext *ctx, TCGOp *op)
123
{
124
- return fold_const2(ctx, op);
125
+ if (fold_const2(ctx, op) ||
126
+ fold_xi_to_not(ctx, op, 0)) {
127
+ return true;
128
+ }
129
+ return false;
130
}
131
132
static bool fold_not(OptContext *ctx, TCGOp *op)
133
{
134
- return fold_const1(ctx, op);
135
+ if (fold_const1(ctx, op)) {
136
+ return true;
137
+ }
138
+
139
+ /* Because of fold_to_not, we want to always return true, via finish. */
140
+ finish_folding(ctx, op);
141
+ return true;
142
}
143
144
static bool fold_or(OptContext *ctx, TCGOp *op)
145
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
146
147
static bool fold_orc(OptContext *ctx, TCGOp *op)
148
{
149
- return fold_const2(ctx, op);
150
+ if (fold_const2(ctx, op) ||
151
+ fold_ix_to_not(ctx, op, 0)) {
152
+ return true;
153
+ }
154
+ return false;
155
}
156
157
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
158
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
159
static bool fold_xor(OptContext *ctx, TCGOp *op)
160
{
161
if (fold_const2(ctx, op) ||
162
- fold_xx_to_i(ctx, op, 0)) {
163
+ fold_xx_to_i(ctx, op, 0) ||
164
+ fold_xi_to_not(ctx, op, -1)) {
165
return true;
166
}
167
return false;
168
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
169
}
170
}
171
break;
172
- CASE_OP_32_64_VEC(xor):
173
- CASE_OP_32_64(nand):
174
- if (!arg_is_const(op->args[1])
175
- && arg_is_const(op->args[2])
176
- && arg_info(op->args[2])->val == -1) {
177
- i = 1;
178
- goto try_not;
179
- }
180
- break;
181
- CASE_OP_32_64(nor):
182
- if (!arg_is_const(op->args[1])
183
- && arg_is_const(op->args[2])
184
- && arg_info(op->args[2])->val == 0) {
185
- i = 1;
186
- goto try_not;
187
- }
188
- break;
189
- CASE_OP_32_64_VEC(andc):
190
- if (!arg_is_const(op->args[2])
191
- && arg_is_const(op->args[1])
192
- && arg_info(op->args[1])->val == -1) {
193
- i = 2;
194
- goto try_not;
195
- }
196
- break;
197
- CASE_OP_32_64_VEC(orc):
198
- CASE_OP_32_64(eqv):
199
- if (!arg_is_const(op->args[2])
200
- && arg_is_const(op->args[1])
201
- && arg_info(op->args[1])->val == 0) {
202
- i = 2;
203
- goto try_not;
204
- }
205
- break;
206
- try_not:
207
- {
208
- TCGOpcode not_op;
209
- bool have_not;
210
-
211
- switch (ctx.type) {
212
- case TCG_TYPE_I32:
213
- not_op = INDEX_op_not_i32;
214
- have_not = TCG_TARGET_HAS_not_i32;
215
- break;
216
- case TCG_TYPE_I64:
217
- not_op = INDEX_op_not_i64;
218
- have_not = TCG_TARGET_HAS_not_i64;
219
- break;
220
- case TCG_TYPE_V64:
221
- case TCG_TYPE_V128:
222
- case TCG_TYPE_V256:
223
- not_op = INDEX_op_not_vec;
224
- have_not = TCG_TARGET_HAS_not_vec;
225
- break;
226
- default:
227
- g_assert_not_reached();
228
- }
229
- if (!have_not) {
230
- break;
231
- }
232
- op->opc = not_op;
233
- reset_temp(op->args[0]);
234
- op->args[1] = op->args[i];
235
- continue;
236
- }
237
default:
238
break;
239
}
240
--
1092
--
241
2.25.1
1093
2.34.1
242
243
diff view generated by jsdifflib
New patch
1
There is an edge condition prior to gcc13 for which optimization
2
is required to generate 16-byte atomic sequences. Detect this.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
meson.build | 52 ++++++++++++++++++++++------------
8
accel/tcg/ldst_atomicity.c.inc | 29 ++++++++++++++++---
9
2 files changed, 59 insertions(+), 22 deletions(-)
10
11
diff --git a/meson.build b/meson.build
12
index XXXXXXX..XXXXXXX 100644
13
--- a/meson.build
14
+++ b/meson.build
15
@@ -XXX,XX +XXX,XX @@ config_host_data.set('HAVE_BROKEN_SIZE_MAX', not cc.compiles('''
16
return printf("%zu", SIZE_MAX);
17
}''', args: ['-Werror']))
18
19
-atomic_test = '''
20
+# See if 64-bit atomic operations are supported.
21
+# Note that without __atomic builtins, we can only
22
+# assume atomic loads/stores max at pointer size.
23
+config_host_data.set('CONFIG_ATOMIC64', cc.links('''
24
#include <stdint.h>
25
int main(void)
26
{
27
- @0@ x = 0, y = 0;
28
+ uint64_t x = 0, y = 0;
29
y = __atomic_load_n(&x, __ATOMIC_RELAXED);
30
__atomic_store_n(&x, y, __ATOMIC_RELAXED);
31
__atomic_compare_exchange_n(&x, &y, x, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
32
__atomic_exchange_n(&x, y, __ATOMIC_RELAXED);
33
__atomic_fetch_add(&x, y, __ATOMIC_RELAXED);
34
return 0;
35
- }'''
36
-
37
-# See if 64-bit atomic operations are supported.
38
-# Note that without __atomic builtins, we can only
39
-# assume atomic loads/stores max at pointer size.
40
-config_host_data.set('CONFIG_ATOMIC64', cc.links(atomic_test.format('uint64_t')))
41
+ }'''))
42
43
has_int128 = cc.links('''
44
__int128_t a;
45
@@ -XXX,XX +XXX,XX @@ if has_int128
46
# "do we have 128-bit atomics which are handled inline and specifically not
47
# via libatomic". The reason we can't use libatomic is documented in the
48
# comment starting "GCC is a house divided" in include/qemu/atomic128.h.
49
- has_atomic128 = cc.links(atomic_test.format('unsigned __int128'))
50
+ # We only care about these operations on 16-byte aligned pointers, so
51
+ # force 16-byte alignment of the pointer, which may be greater than
52
+ # __alignof(unsigned __int128) for the host.
53
+ atomic_test_128 = '''
54
+ int main(int ac, char **av) {
55
+ unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], sizeof(16));
56
+ p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
57
+ __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
58
+ __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
59
+ return 0;
60
+ }'''
61
+ has_atomic128 = cc.links(atomic_test_128)
62
63
config_host_data.set('CONFIG_ATOMIC128', has_atomic128)
64
65
if not has_atomic128
66
- has_cmpxchg128 = cc.links('''
67
- int main(void)
68
- {
69
- unsigned __int128 x = 0, y = 0;
70
- __sync_val_compare_and_swap_16(&x, y, x);
71
- return 0;
72
- }
73
- ''')
74
+ # Even with __builtin_assume_aligned, the above test may have failed
75
+ # without optimization enabled. Try again with optimizations locally
76
+ # enabled for the function. See
77
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
78
+ has_atomic128_opt = cc.links('__attribute__((optimize("O1")))' + atomic_test_128)
79
+ config_host_data.set('CONFIG_ATOMIC128_OPT', has_atomic128_opt)
80
81
- config_host_data.set('CONFIG_CMPXCHG128', has_cmpxchg128)
82
+ if not has_atomic128_opt
83
+ config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
84
+ int main(void)
85
+ {
86
+ unsigned __int128 x = 0, y = 0;
87
+ __sync_val_compare_and_swap_16(&x, y, x);
88
+ return 0;
89
+ }
90
+ '''))
91
+ endif
92
endif
93
endif
94
95
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc
96
index XXXXXXX..XXXXXXX 100644
97
--- a/accel/tcg/ldst_atomicity.c.inc
98
+++ b/accel/tcg/ldst_atomicity.c.inc
99
@@ -XXX,XX +XXX,XX @@
100
#endif
101
#define HAVE_al8_fast (ATOMIC_REG_SIZE >= 8)
102
103
+/*
104
+ * If __alignof(unsigned __int128) < 16, GCC may refuse to inline atomics
105
+ * that are supported by the host, e.g. s390x. We can force the pointer to
106
+ * have our known alignment with __builtin_assume_aligned, however prior to
107
+ * GCC 13 that was only reliable with optimization enabled. See
108
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107389
109
+ */
110
+#if defined(CONFIG_ATOMIC128_OPT)
111
+# if !defined(__OPTIMIZE__)
112
+# define ATTRIBUTE_ATOMIC128_OPT __attribute__((optimize("O1")))
113
+# endif
114
+# define CONFIG_ATOMIC128
115
+#endif
116
+#ifndef ATTRIBUTE_ATOMIC128_OPT
117
+# define ATTRIBUTE_ATOMIC128_OPT
118
+#endif
119
+
120
#if defined(CONFIG_ATOMIC128)
121
# define HAVE_al16_fast true
122
#else
123
@@ -XXX,XX +XXX,XX @@ static inline uint64_t load_atomic8(void *pv)
124
*
125
* Atomically load 16 aligned bytes from @pv.
126
*/
127
-static inline Int128 load_atomic16(void *pv)
128
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
129
+load_atomic16(void *pv)
130
{
131
#ifdef CONFIG_ATOMIC128
132
__uint128_t *p = __builtin_assume_aligned(pv, 16);
133
@@ -XXX,XX +XXX,XX @@ static uint64_t load_atom_extract_al16_or_exit(CPUArchState *env, uintptr_t ra,
134
* cross an 16-byte boundary then the access must be 16-byte atomic,
135
* otherwise the access must be 8-byte atomic.
136
*/
137
-static inline uint64_t load_atom_extract_al16_or_al8(void *pv, int s)
138
+static inline uint64_t ATTRIBUTE_ATOMIC128_OPT
139
+load_atom_extract_al16_or_al8(void *pv, int s)
140
{
141
#if defined(CONFIG_ATOMIC128)
142
uintptr_t pi = (uintptr_t)pv;
143
@@ -XXX,XX +XXX,XX @@ static inline void store_atomic8(void *pv, uint64_t val)
144
*
145
* Atomically store 16 aligned bytes to @pv.
146
*/
147
-static inline void store_atomic16(void *pv, Int128Alias val)
148
+static inline void ATTRIBUTE_ATOMIC128_OPT
149
+store_atomic16(void *pv, Int128Alias val)
150
{
151
#if defined(CONFIG_ATOMIC128)
152
__uint128_t *pu = __builtin_assume_aligned(pv, 16);
153
@@ -XXX,XX +XXX,XX @@ static void store_atom_insert_al8(uint64_t *p, uint64_t val, uint64_t msk)
154
*
155
* Atomically store @val to @p masked by @msk.
156
*/
157
-static void store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
158
+static void ATTRIBUTE_ATOMIC128_OPT
159
+store_atom_insert_al16(Int128 *ps, Int128Alias val, Int128Alias msk)
160
{
161
#if defined(CONFIG_ATOMIC128)
162
__uint128_t *pu, old, new;
163
--
164
2.34.1
diff view generated by jsdifflib
1
Recognize the constant function for remainder.
1
Notice when Intel or AMD have guaranteed that vmovdqa is atomic.
2
The new variable will also be used in generated code.
2
3
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 6 +++++-
7
include/qemu/cpuid.h | 18 ++++++++++++++++++
8
1 file changed, 5 insertions(+), 1 deletion(-)
8
tcg/i386/tcg-target.h | 1 +
9
tcg/i386/tcg-target.c.inc | 27 +++++++++++++++++++++++++++
10
3 files changed, 46 insertions(+)
9
11
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
14
--- a/include/qemu/cpuid.h
13
+++ b/tcg/optimize.c
15
+++ b/include/qemu/cpuid.h
14
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@
15
17
#define bit_LZCNT (1 << 5)
16
static bool fold_remainder(OptContext *ctx, TCGOp *op)
18
#endif
19
20
+/*
21
+ * Signatures for different CPU implementations as returned from Leaf 0.
22
+ */
23
+
24
+#ifndef signature_INTEL_ecx
25
+/* "Genu" "ineI" "ntel" */
26
+#define signature_INTEL_ebx 0x756e6547
27
+#define signature_INTEL_edx 0x49656e69
28
+#define signature_INTEL_ecx 0x6c65746e
29
+#endif
30
+
31
+#ifndef signature_AMD_ecx
32
+/* "Auth" "enti" "cAMD" */
33
+#define signature_AMD_ebx 0x68747541
34
+#define signature_AMD_edx 0x69746e65
35
+#define signature_AMD_ecx 0x444d4163
36
+#endif
37
+
38
static inline unsigned xgetbv_low(unsigned c)
17
{
39
{
18
- return fold_const2(ctx, op);
40
unsigned a, d;
19
+ if (fold_const2(ctx, op) ||
41
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
20
+ fold_xx_to_i(ctx, op, 0)) {
42
index XXXXXXX..XXXXXXX 100644
21
+ return true;
43
--- a/tcg/i386/tcg-target.h
22
+ }
44
+++ b/tcg/i386/tcg-target.h
23
+ return false;
45
@@ -XXX,XX +XXX,XX @@ extern bool have_avx512dq;
24
}
46
extern bool have_avx512vbmi2;
25
47
extern bool have_avx512vl;
26
static bool fold_setcond(OptContext *ctx, TCGOp *op)
48
extern bool have_movbe;
49
+extern bool have_atomic16;
50
51
/* optional instructions */
52
#define TCG_TARGET_HAS_div2_i32 1
53
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/i386/tcg-target.c.inc
56
+++ b/tcg/i386/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ bool have_avx512dq;
58
bool have_avx512vbmi2;
59
bool have_avx512vl;
60
bool have_movbe;
61
+bool have_atomic16;
62
63
#ifdef CONFIG_CPUID_H
64
static bool have_bmi2;
65
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
66
have_avx512dq = (b7 & bit_AVX512DQ) != 0;
67
have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
68
}
69
+
70
+ /*
71
+ * The Intel SDM has added:
72
+ * Processors that enumerate support for Intel® AVX
73
+ * (by setting the feature flag CPUID.01H:ECX.AVX[bit 28])
74
+ * guarantee that the 16-byte memory operations performed
75
+ * by the following instructions will always be carried
76
+ * out atomically:
77
+ * - MOVAPD, MOVAPS, and MOVDQA.
78
+ * - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
79
+ * - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
80
+ * with EVEX.128 and k0 (masking disabled).
81
+ * Note that these instructions require the linear addresses
82
+ * of their memory operands to be 16-byte aligned.
83
+ *
84
+ * AMD has provided an even stronger guarantee that processors
85
+ * with AVX provide 16-byte atomicity for all cachable,
86
+ * naturally aligned single loads and stores, e.g. MOVDQU.
87
+ *
88
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
89
+ */
90
+ if (have_avx1) {
91
+ __cpuid(0, a, b, c, d);
92
+ have_atomic16 = (c == signature_INTEL_ecx ||
93
+ c == signature_AMD_ecx);
94
+ }
95
}
96
}
97
}
27
--
98
--
28
2.25.1
99
2.34.1
29
100
30
101
diff view generated by jsdifflib
1
Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
1
Notice when the host has additional atomic instructions.
2
and muls2_i64.
2
The new variables will also be used in generated code.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
8
tcg/aarch64/tcg-target.h | 3 +++
9
1 file changed, 35 insertions(+), 9 deletions(-)
9
tcg/aarch64/tcg-target.c.inc | 12 ++++++++++++
10
2 files changed, 15 insertions(+)
10
11
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
--- a/tcg/aarch64/tcg-target.h
14
+++ b/tcg/optimize.c
15
+++ b/tcg/aarch64/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ typedef enum {
16
return false;
17
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_EVEN
18
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL
19
20
+extern bool have_lse;
21
+extern bool have_lse2;
22
+
23
/* optional instructions */
24
#define TCG_TARGET_HAS_div_i32 1
25
#define TCG_TARGET_HAS_rem_i32 1
26
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/aarch64/tcg-target.c.inc
29
+++ b/tcg/aarch64/tcg-target.c.inc
30
@@ -XXX,XX +XXX,XX @@
31
#include "../tcg-ldst.c.inc"
32
#include "../tcg-pool.c.inc"
33
#include "qemu/bitops.h"
34
+#ifdef __linux__
35
+#include <asm/hwcap.h>
36
+#endif
37
38
/* We're going to re-use TCGType in setting of the SF bit, which controls
39
the size of the operation performed. If we know the values match, it
40
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
41
return TCG_REG_X0 + slot;
17
}
42
}
18
43
19
-static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
44
+bool have_lse;
20
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
45
+bool have_lse2;
46
+
47
#define TCG_REG_TMP TCG_REG_X30
48
#define TCG_VEC_TMP TCG_REG_V31
49
50
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
51
52
static void tcg_target_init(TCGContext *s)
21
{
53
{
22
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
54
+#ifdef __linux__
23
- uint32_t a = arg_info(op->args[2])->val;
55
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
24
- uint32_t b = arg_info(op->args[3])->val;
56
+ have_lse = hwcap & HWCAP_ATOMICS;
25
- uint64_t r = (uint64_t)a * b;
57
+ have_lse2 = hwcap & HWCAP_USCAT;
26
+ uint64_t a = arg_info(op->args[2])->val;
58
+#endif
27
+ uint64_t b = arg_info(op->args[3])->val;
28
+ uint64_t h, l;
29
TCGArg rl, rh;
30
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
31
+ TCGOp *op2;
32
+
59
+
33
+ switch (op->opc) {
60
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
34
+ case INDEX_op_mulu2_i32:
61
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
35
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
62
tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
36
+ h = (int32_t)(l >> 32);
37
+ l = (int32_t)l;
38
+ break;
39
+ case INDEX_op_muls2_i32:
40
+ l = (int64_t)(int32_t)a * (int32_t)b;
41
+ h = l >> 32;
42
+ l = (int32_t)l;
43
+ break;
44
+ case INDEX_op_mulu2_i64:
45
+ mulu64(&l, &h, a, b);
46
+ break;
47
+ case INDEX_op_muls2_i64:
48
+ muls64(&l, &h, a, b);
49
+ break;
50
+ default:
51
+ g_assert_not_reached();
52
+ }
53
54
rl = op->args[0];
55
rh = op->args[1];
56
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
57
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
58
+
59
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
60
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
61
+
62
+ tcg_opt_gen_movi(ctx, op, rl, l);
63
+ tcg_opt_gen_movi(ctx, op2, rh, h);
64
return true;
65
}
66
return false;
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
CASE_OP_32_64(muluh):
69
done = fold_mul_highpart(&ctx, op);
70
break;
71
- case INDEX_op_mulu2_i32:
72
- done = fold_mulu2_i32(&ctx, op);
73
+ CASE_OP_32_64(muls2):
74
+ CASE_OP_32_64(mulu2):
75
+ done = fold_multiply2(&ctx, op);
76
break;
77
CASE_OP_32_64(nand):
78
done = fold_nand(&ctx, op);
79
--
63
--
80
2.25.1
64
2.34.1
81
65
82
66
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
These features are present for Apple M1.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
7
---
5
tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
8
tcg/aarch64/tcg-target.c.inc | 28 ++++++++++++++++++++++++++++
6
1 file changed, 31 insertions(+), 25 deletions(-)
9
1 file changed, 28 insertions(+)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/tcg/aarch64/tcg-target.c.inc
11
+++ b/tcg/optimize.c
14
+++ b/tcg/aarch64/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@
13
return true;
16
#ifdef __linux__
17
#include <asm/hwcap.h>
18
#endif
19
+#ifdef CONFIG_DARWIN
20
+#include <sys/sysctl.h>
21
+#endif
22
23
/* We're going to re-use TCGType in setting of the SF bit, which controls
24
the size of the operation performed. If we know the values match, it
25
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
26
}
14
}
27
}
15
28
16
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
29
+#ifdef CONFIG_DARWIN
30
+static bool sysctl_for_bool(const char *name)
17
+{
31
+{
18
+ TCGOpcode opc = op->opc;
32
+ int val = 0;
19
+ TCGCond cond = op->args[5];
33
+ size_t len = sizeof(val);
20
+ int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
21
+
34
+
22
+ if (i >= 0) {
35
+ if (sysctlbyname(name, &val, &len, NULL, 0) == 0) {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
36
+ return val != 0;
24
+ }
37
+ }
25
+
38
+
26
+ if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
39
+ /*
27
+ uint64_t tv = arg_info(op->args[3])->val;
40
+ * We might in the future ask for properties not present in older kernels,
28
+ uint64_t fv = arg_info(op->args[4])->val;
41
+ * but we're only asking about static properties, all of which should be
29
+
42
+ * 'int'. So we shouln't see ENOMEM (val too small), or any of the other
30
+ opc = (opc == INDEX_op_movcond_i32
43
+ * more exotic errors.
31
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
44
+ */
32
+
45
+ assert(errno == ENOENT);
33
+ if (tv == 1 && fv == 0) {
34
+ op->opc = opc;
35
+ op->args[3] = cond;
36
+ } else if (fv == 1 && tv == 0) {
37
+ op->opc = opc;
38
+ op->args[3] = tcg_invert_cond(cond);
39
+ }
40
+ }
41
+ return false;
46
+ return false;
42
+}
47
+}
48
+#endif
43
+
49
+
44
static bool fold_mul(OptContext *ctx, TCGOp *op)
50
static void tcg_target_init(TCGContext *s)
45
{
51
{
46
return fold_const2(ctx, op);
52
#ifdef __linux__
47
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
53
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
48
}
54
have_lse = hwcap & HWCAP_ATOMICS;
49
break;
55
have_lse2 = hwcap & HWCAP_USCAT;
50
56
#endif
51
- CASE_OP_32_64(movcond):
57
+#ifdef CONFIG_DARWIN
52
- i = do_constant_folding_cond(opc, op->args[1],
58
+ have_lse = sysctl_for_bool("hw.optional.arm.FEAT_LSE");
53
- op->args[2], op->args[5]);
59
+ have_lse2 = sysctl_for_bool("hw.optional.arm.FEAT_LSE2");
54
- if (i >= 0) {
60
+#endif
55
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
61
56
- continue;
62
tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffffu;
57
- }
63
tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffffu;
58
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
59
- uint64_t tv = arg_info(op->args[3])->val;
60
- uint64_t fv = arg_info(op->args[4])->val;
61
- TCGCond cond = op->args[5];
62
-
63
- if (fv == 1 && tv == 0) {
64
- cond = tcg_invert_cond(cond);
65
- } else if (!(tv == 1 && fv == 0)) {
66
- break;
67
- }
68
- op->args[3] = cond;
69
- op->opc = opc = (opc == INDEX_op_movcond_i32
70
- ? INDEX_op_setcond_i32
71
- : INDEX_op_setcond_i64);
72
- }
73
- break;
74
-
75
-
76
default:
77
break;
78
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
case INDEX_op_mb:
81
done = fold_mb(&ctx, op);
82
break;
83
+ CASE_OP_32_64(movcond):
84
+ done = fold_movcond(&ctx, op);
85
+ break;
86
CASE_OP_32_64(mul):
87
done = fold_mul(&ctx, op);
88
break;
89
--
64
--
90
2.25.1
65
2.34.1
91
66
92
67
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 52 +++------------------------------------
9
1 file changed, 4 insertions(+), 48 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
int seg;
17
} HostAddress;
18
19
-#if defined(CONFIG_SOFTMMU)
20
/*
21
* Because i686 has no register parameters and because x86_64 has xchg
22
* to handle addr/data register overlap, we have placed all input arguments
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
25
/* resolve label address */
26
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
27
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
28
+ if (label_ptr[1]) {
29
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
30
}
31
32
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
33
34
/* resolve label address */
35
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
36
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
37
+ if (label_ptr[1]) {
38
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
42
tcg_out_jmp(s, l->raddr);
43
return true;
44
}
45
-#else
46
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
47
-{
48
- /* resolve label address */
49
- tcg_patch32(l->label_ptr[0], s->code_ptr - l->label_ptr[0] - 4);
50
-
51
- if (TCG_TARGET_REG_BITS == 32) {
52
- int ofs = 0;
53
-
54
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
55
- ofs += 4;
56
-
57
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
58
- ofs += 4;
59
- if (TARGET_LONG_BITS == 64) {
60
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
61
- ofs += 4;
62
- }
63
-
64
- tcg_out_pushi(s, (uintptr_t)l->raddr);
65
- } else {
66
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
67
- l->addrlo_reg);
68
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
69
-
70
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RAX, (uintptr_t)l->raddr);
71
- tcg_out_push(s, TCG_REG_RAX);
72
- }
73
-
74
- /* "Tail call" to the helper, with the return address back inline. */
75
- tcg_out_jmp(s, (const void *)(l->is_ld ? helper_unaligned_ld
76
- : helper_unaligned_st));
77
- return true;
78
-}
79
-
80
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
81
-{
82
- return tcg_out_fail_alignment(s, l);
83
-}
84
-
85
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
86
-{
87
- return tcg_out_fail_alignment(s, l);
88
-}
89
90
+#ifndef CONFIG_SOFTMMU
91
static HostAddress x86_guest_base = {
92
.index = -1
93
};
94
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
95
return 0;
96
}
97
#endif /* setup_guest_base_seg */
98
-#endif /* SOFTMMU */
99
+#endif /* !SOFTMMU */
100
101
/*
102
* For softmmu, perform the TLB load and compare.
103
--
104
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target.c.inc | 35 -----------------------------------
9
1 file changed, 35 deletions(-)
10
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
TCGType index_ext;
17
} HostAddress;
18
19
-#ifdef CONFIG_SOFTMMU
20
static const TCGLdstHelperParam ldst_helper_param = {
21
.ntmp = 1, .tmp = { TCG_REG_TMP }
22
};
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_goto(s, lb->raddr);
25
return true;
26
}
27
-#else
28
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
29
-{
30
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
31
- tcg_debug_assert(offset == sextract64(offset, 0, 21));
32
- tcg_out_insn(s, 3406, ADR, rd, offset);
33
-}
34
-
35
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
36
-{
37
- if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
38
- return false;
39
- }
40
-
41
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, l->addrlo_reg);
42
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
43
-
44
- /* "Tail call" to the helper, with the return address back inline. */
45
- tcg_out_adr(s, TCG_REG_LR, l->raddr);
46
- tcg_out_goto_long(s, (const void *)(l->is_ld ? helper_unaligned_ld
47
- : helper_unaligned_st));
48
- return true;
49
-}
50
-
51
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
52
-{
53
- return tcg_out_fail_alignment(s, l);
54
-}
55
-
56
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
57
-{
58
- return tcg_out_fail_alignment(s, l);
59
-}
60
-#endif /* CONFIG_SOFTMMU */
61
62
/*
63
* For softmmu, perform the TLB load and compare.
64
--
65
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/ppc/tcg-target.c.inc | 44 ----------------------------------------
9
1 file changed, 44 deletions(-)
10
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/ppc/tcg-target.c.inc
14
+++ b/tcg/ppc/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static const uint32_t qemu_stx_opc[(MO_SIZE + MO_BSWAP) + 1] = {
16
[MO_BSWAP | MO_UQ] = STDBRX,
17
};
18
19
-#if defined (CONFIG_SOFTMMU)
20
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
{
22
if (arg < 0) {
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_b(s, 0, lb->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
- return false;
32
- }
33
-
34
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
35
- TCGReg arg = TCG_REG_R4;
36
-
37
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
38
- if (l->addrlo_reg != arg) {
39
- tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
40
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
41
- } else if (l->addrhi_reg != arg + 1) {
42
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, l->addrlo_reg);
43
- tcg_out_mov(s, TCG_TYPE_I32, arg, l->addrhi_reg);
44
- } else {
45
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R0, arg);
46
- tcg_out_mov(s, TCG_TYPE_I32, arg, arg + 1);
47
- tcg_out_mov(s, TCG_TYPE_I32, arg + 1, TCG_REG_R0);
48
- }
49
- } else {
50
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R4, l->addrlo_reg);
51
- }
52
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, TCG_AREG0);
53
-
54
- /* "Tail call" to the helper, with the return address back inline. */
55
- tcg_out_call_int(s, 0, (const void *)(l->is_ld ? helper_unaligned_ld
56
- : helper_unaligned_st));
57
- return true;
58
-}
59
-
60
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
61
-{
62
- return tcg_out_fail_alignment(s, l);
63
-}
64
-
65
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
66
-{
67
- return tcg_out_fail_alignment(s, l);
68
-}
69
-#endif /* SOFTMMU */
70
71
typedef struct {
72
TCGReg base;
73
--
74
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/loongarch64/tcg-target.c.inc | 30 ------------------------------
9
1 file changed, 30 deletions(-)
10
11
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/loongarch64/tcg-target.c.inc
14
+++ b/tcg/loongarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
16
* Load/store helpers for SoftMMU, and qemu_ld/st implementations
17
*/
18
19
-#if defined(CONFIG_SOFTMMU)
20
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
21
{
22
tcg_out_opc_b(s, 0);
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
25
return tcg_out_goto(s, l->raddr);
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- /* resolve label address */
31
- if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
37
-
38
- /* tail call, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
40
- tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st), true);
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-
55
-#endif /* CONFIG_SOFTMMU */
56
57
typedef struct {
58
TCGReg base;
59
--
60
2.34.1
diff view generated by jsdifflib
New patch
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/riscv/tcg-target.c.inc | 29 -----------------------------
9
1 file changed, 29 deletions(-)
10
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/riscv/tcg-target.c.inc
14
+++ b/tcg/riscv/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
16
* Load/store and TLB
17
*/
18
19
-#if defined(CONFIG_SOFTMMU)
20
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
21
{
22
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
tcg_out_goto(s, l->raddr);
25
return true;
26
}
27
-#else
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
29
-{
30
- /* resolve label address */
31
- if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
32
- return false;
33
- }
34
-
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
37
-
38
- /* tail call, with the return address back inline. */
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (uintptr_t)l->raddr);
40
- tcg_out_call_int(s, (const void *)(l->is_ld ? helper_unaligned_ld
41
- : helper_unaligned_st), true);
42
- return true;
43
-}
44
-
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-#endif /* CONFIG_SOFTMMU */
55
56
/*
57
* For softmmu, perform the TLB load and compare.
58
--
59
2.34.1
diff view generated by jsdifflib
New patch
1
Always reserve r3 for tlb softmmu lookup. Fix a bug in user-only
2
ALL_QLDST_REGS, in that r14 is clobbered by the BLNE that leads
3
to the misaligned trap. Remove r0+r1 from user-only ALL_QLDST_REGS;
4
I believe these had been reserved for bswap, which we no longer
5
perform during qemu_st.
1
6
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/arm/tcg-target-con-set.h | 16 ++++++++--------
11
tcg/arm/tcg-target-con-str.h | 5 ++---
12
tcg/arm/tcg-target.c.inc | 23 ++++++++---------------
13
3 files changed, 18 insertions(+), 26 deletions(-)
14
15
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/arm/tcg-target-con-set.h
18
+++ b/tcg/arm/tcg-target-con-set.h
19
@@ -XXX,XX +XXX,XX @@
20
C_O0_I1(r)
21
C_O0_I2(r, r)
22
C_O0_I2(r, rIN)
23
-C_O0_I2(s, s)
24
+C_O0_I2(q, q)
25
C_O0_I2(w, r)
26
-C_O0_I3(s, s, s)
27
-C_O0_I3(S, p, s)
28
+C_O0_I3(q, q, q)
29
+C_O0_I3(Q, p, q)
30
C_O0_I4(r, r, rI, rI)
31
-C_O0_I4(S, p, s, s)
32
-C_O1_I1(r, l)
33
+C_O0_I4(Q, p, q, q)
34
+C_O1_I1(r, q)
35
C_O1_I1(r, r)
36
C_O1_I1(w, r)
37
C_O1_I1(w, w)
38
C_O1_I1(w, wr)
39
C_O1_I2(r, 0, rZ)
40
-C_O1_I2(r, l, l)
41
+C_O1_I2(r, q, q)
42
C_O1_I2(r, r, r)
43
C_O1_I2(r, r, rI)
44
C_O1_I2(r, r, rIK)
45
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wZ)
46
C_O1_I3(w, w, w, w)
47
C_O1_I4(r, r, r, rI, rI)
48
C_O1_I4(r, r, rIN, rIK, 0)
49
-C_O2_I1(e, p, l)
50
-C_O2_I2(e, p, l, l)
51
+C_O2_I1(e, p, q)
52
+C_O2_I2(e, p, q, q)
53
C_O2_I2(r, r, r, r)
54
C_O2_I4(r, r, r, r, rIN, rIK)
55
C_O2_I4(r, r, rI, rI, rIN, rIK)
56
diff --git a/tcg/arm/tcg-target-con-str.h b/tcg/arm/tcg-target-con-str.h
57
index XXXXXXX..XXXXXXX 100644
58
--- a/tcg/arm/tcg-target-con-str.h
59
+++ b/tcg/arm/tcg-target-con-str.h
60
@@ -XXX,XX +XXX,XX @@
61
*/
62
REGS('e', ALL_GENERAL_REGS & 0x5555) /* even regs */
63
REGS('r', ALL_GENERAL_REGS)
64
-REGS('l', ALL_QLOAD_REGS)
65
-REGS('s', ALL_QSTORE_REGS)
66
-REGS('S', ALL_QSTORE_REGS & 0x5555) /* even qstore */
67
+REGS('q', ALL_QLDST_REGS)
68
+REGS('Q', ALL_QLDST_REGS & 0x5555) /* even qldst */
69
REGS('w', ALL_VECTOR_REGS)
70
71
/*
72
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
73
index XXXXXXX..XXXXXXX 100644
74
--- a/tcg/arm/tcg-target.c.inc
75
+++ b/tcg/arm/tcg-target.c.inc
76
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
77
#define ALL_VECTOR_REGS 0xffff0000u
78
79
/*
80
- * r0-r2 will be overwritten when reading the tlb entry (softmmu only)
81
- * and r0-r1 doing the byte swapping, so don't use these.
82
- * r3 is removed for softmmu to avoid clashes with helper arguments.
83
+ * r0-r3 will be overwritten when reading the tlb entry (softmmu only);
84
+ * r14 will be overwritten by the BLNE branching to the slow path.
85
*/
86
#ifdef CONFIG_SOFTMMU
87
-#define ALL_QLOAD_REGS \
88
+#define ALL_QLDST_REGS \
89
(ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
90
(1 << TCG_REG_R2) | (1 << TCG_REG_R3) | \
91
(1 << TCG_REG_R14)))
92
-#define ALL_QSTORE_REGS \
93
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1) | \
94
- (1 << TCG_REG_R2) | (1 << TCG_REG_R14) | \
95
- ((TARGET_LONG_BITS == 64) << TCG_REG_R3)))
96
#else
97
-#define ALL_QLOAD_REGS ALL_GENERAL_REGS
98
-#define ALL_QSTORE_REGS \
99
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R0) | (1 << TCG_REG_R1)))
100
+#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R14))
101
#endif
102
103
/*
104
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
105
return C_O1_I4(r, r, r, rI, rI);
106
107
case INDEX_op_qemu_ld_i32:
108
- return TARGET_LONG_BITS == 32 ? C_O1_I1(r, l) : C_O1_I2(r, l, l);
109
+ return TARGET_LONG_BITS == 32 ? C_O1_I1(r, q) : C_O1_I2(r, q, q);
110
case INDEX_op_qemu_ld_i64:
111
- return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, l) : C_O2_I2(e, p, l, l);
112
+ return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, q) : C_O2_I2(e, p, q, q);
113
case INDEX_op_qemu_st_i32:
114
- return TARGET_LONG_BITS == 32 ? C_O0_I2(s, s) : C_O0_I3(s, s, s);
115
+ return TARGET_LONG_BITS == 32 ? C_O0_I2(q, q) : C_O0_I3(q, q, q);
116
case INDEX_op_qemu_st_i64:
117
- return TARGET_LONG_BITS == 32 ? C_O0_I3(S, p, s) : C_O0_I4(S, p, s, s);
118
+ return TARGET_LONG_BITS == 32 ? C_O0_I3(Q, p, q) : C_O0_I4(Q, p, q, q);
119
120
case INDEX_op_st_vec:
121
return C_O0_I2(w, r);
122
--
123
2.34.1
diff view generated by jsdifflib
1
This is the final entry in the main switch that was in a
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
different form. After this, we have the option to convert
2
This will allow the fast path to increase alignment to implement atomicity
3
the switch into a function dispatch table.
3
while not immediately raising an alignment exception.
4
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 27 ++++++++++++++-------------
8
tcg/arm/tcg-target.c.inc | 45 ----------------------------------------
10
1 file changed, 14 insertions(+), 13 deletions(-)
9
1 file changed, 45 deletions(-)
11
10
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
13
--- a/tcg/arm/tcg-target.c.inc
15
+++ b/tcg/optimize.c
14
+++ b/tcg/arm/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
bool index_scratch;
17
} HostAddress;
18
19
-#ifdef CONFIG_SOFTMMU
20
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
{
22
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
24
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
17
return true;
25
return true;
18
}
26
}
19
27
-#else
20
+static bool fold_mov(OptContext *ctx, TCGOp *op)
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
21
+{
29
-{
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
30
- if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
23
+}
31
- return false;
24
+
32
- }
25
static bool fold_movcond(OptContext *ctx, TCGOp *op)
26
{
27
TCGOpcode opc = op->opc;
28
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
29
break;
30
}
31
32
- /* Propagate constants through copy operations and do constant
33
- folding. Constants will be substituted to arguments by register
34
- allocator where needed and possible. Also detect copies. */
35
+ /*
36
+ * Process each opcode.
37
+ * Sorted alphabetically by opcode as much as possible.
38
+ */
39
switch (opc) {
40
- CASE_OP_32_64_VEC(mov):
41
- done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
42
- break;
43
-
33
-
44
- default:
34
- if (TARGET_LONG_BITS == 64) {
45
- break;
35
- /* 64-bit target address is aligned into R2:R3. */
36
- TCGMovExtend ext[2] = {
37
- { .dst = TCG_REG_R2, .dst_type = TCG_TYPE_I32,
38
- .src = l->addrlo_reg,
39
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
40
- { .dst = TCG_REG_R3, .dst_type = TCG_TYPE_I32,
41
- .src = l->addrhi_reg,
42
- .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
43
- };
44
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
45
- } else {
46
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R1, l->addrlo_reg);
47
- }
48
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_AREG0);
46
-
49
-
47
- /* ---------------------------------------------------------- */
50
- /*
48
- /* Sorted alphabetically by opcode as much as possible. */
51
- * Tail call to the helper, with the return address back inline,
52
- * just for the clarity of the debugging traceback -- the helper
53
- * cannot return. We have used BLNE to arrive here, so LR is
54
- * already set.
55
- */
56
- tcg_out_goto(s, COND_AL, (const void *)
57
- (l->is_ld ? helper_unaligned_ld : helper_unaligned_st));
58
- return true;
59
-}
49
-
60
-
50
CASE_OP_32_64_VEC(add):
61
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
done = fold_add(&ctx, op);
62
-{
52
break;
63
- return tcg_out_fail_alignment(s, l);
53
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
64
-}
54
case INDEX_op_mb:
65
-
55
done = fold_mb(&ctx, op);
66
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
56
break;
67
-{
57
+ CASE_OP_32_64_VEC(mov):
68
- return tcg_out_fail_alignment(s, l);
58
+ done = fold_mov(&ctx, op);
69
-}
59
+ break;
70
-#endif /* SOFTMMU */
60
CASE_OP_32_64(movcond):
71
61
done = fold_movcond(&ctx, op);
72
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
62
break;
73
TCGReg addrlo, TCGReg addrhi,
63
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
64
CASE_OP_32_64_VEC(xor):
65
done = fold_xor(&ctx, op);
66
break;
67
+ default:
68
+ break;
69
}
70
71
if (!done) {
72
--
74
--
73
2.25.1
75
2.34.1
74
75
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
7
---
5
tcg/optimize.c | 32 ++++++++++++++++++--------------
8
tcg/mips/tcg-target.c.inc | 57 ++-------------------------------------
6
1 file changed, 18 insertions(+), 14 deletions(-)
9
1 file changed, 2 insertions(+), 55 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/tcg/mips/tcg-target.c.inc
11
+++ b/tcg/optimize.c
14
+++ b/tcg/mips/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
16
tcg_out_nop(s);
17
}
18
19
-#if defined(CONFIG_SOFTMMU)
20
/* We have four temps, we might as well expose three of them. */
21
static const TCGLdstHelperParam ldst_helper_param = {
22
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
25
/* resolve label address */
26
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
27
- || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
28
- && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
29
+ || (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
30
return false;
31
}
32
33
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
34
35
/* resolve label address */
36
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
37
- || (TCG_TARGET_REG_BITS < TARGET_LONG_BITS
38
- && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
39
+ || (l->label_ptr[1] && !reloc_pc16(l->label_ptr[1], tgt_rx))) {
40
return false;
41
}
42
43
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
13
return true;
44
return true;
14
}
45
}
15
46
16
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
47
-#else
17
+{
48
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
18
+ if (arg_is_const(op->args[1])) {
49
-{
19
+ uint64_t t = arg_info(op->args[1])->val;
50
- void *target;
20
+
21
+ if (t != 0) {
22
+ t = do_constant_folding(op->opc, t, 0);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
24
+ }
25
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
26
+ }
27
+ return false;
28
+}
29
+
30
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
31
{
32
return fold_const1(ctx, op);
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
34
}
35
break;
36
37
- CASE_OP_32_64(clz):
38
- CASE_OP_32_64(ctz):
39
- if (arg_is_const(op->args[1])) {
40
- TCGArg v = arg_info(op->args[1])->val;
41
- if (v != 0) {
42
- tmp = do_constant_folding(opc, v, 0);
43
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
44
- } else {
45
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
46
- }
47
- continue;
48
- }
49
- break;
50
-
51
-
51
default:
52
- if (!reloc_pc16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
52
break;
53
- return false;
53
54
- }
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
-
55
case INDEX_op_brcond2_i32:
56
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
56
done = fold_brcond2(&ctx, op);
57
- /* A0 is env, A1 is skipped, A2:A3 is the uint64_t address. */
57
break;
58
- TCGReg a2 = MIPS_BE ? l->addrhi_reg : l->addrlo_reg;
58
+ CASE_OP_32_64(clz):
59
- TCGReg a3 = MIPS_BE ? l->addrlo_reg : l->addrhi_reg;
59
+ CASE_OP_32_64(ctz):
60
-
60
+ done = fold_count_zeros(&ctx, op);
61
- if (a3 != TCG_REG_A2) {
61
+ break;
62
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
62
CASE_OP_32_64(ctpop):
63
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
63
done = fold_ctpop(&ctx, op);
64
- } else if (a2 != TCG_REG_A3) {
64
break;
65
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, a3);
66
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, a2);
67
- } else {
68
- tcg_out_mov(s, TCG_TYPE_I32, TCG_TMP0, TCG_REG_A2);
69
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A2, TCG_REG_A3);
70
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_A3, TCG_TMP0);
71
- }
72
- } else {
73
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_A1, l->addrlo_reg);
74
- }
75
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
76
-
77
- /*
78
- * Tail call to the helper, with the return address back inline.
79
- * We have arrived here via BNEL, so $31 is already set.
80
- */
81
- target = (l->is_ld ? helper_unaligned_ld : helper_unaligned_st);
82
- tcg_out_call_int(s, target, true);
83
- return true;
84
-}
85
-
86
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
87
-{
88
- return tcg_out_fail_alignment(s, l);
89
-}
90
-
91
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
92
-{
93
- return tcg_out_fail_alignment(s, l);
94
-}
95
-#endif /* SOFTMMU */
96
-
97
typedef struct {
98
TCGReg base;
99
MemOp align;
65
--
100
--
66
2.25.1
101
2.34.1
67
68
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Instead of using helper_unaligned_{ld,st}, use the full load/store helpers.
2
This will allow the fast path to increase alignment to implement atomicity
3
while not immediately raising an alignment exception.
2
4
3
These will be used to implement new decimal floating point
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
instructions from Power ISA 3.1.
5
6
The remainder is now returned directly by divu128/divs128,
7
freeing up phigh to receive the high 64 bits of the quotient.
8
9
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
7
---
14
include/hw/clock.h | 6 +-
8
tcg/s390x/tcg-target.c.inc | 29 -----------------------------
15
include/qemu/host-utils.h | 20 ++++--
9
1 file changed, 29 deletions(-)
16
target/ppc/int_helper.c | 9 +--
17
util/host-utils.c | 133 +++++++++++++++++++++++++-------------
18
4 files changed, 108 insertions(+), 60 deletions(-)
19
10
20
diff --git a/include/hw/clock.h b/include/hw/clock.h
11
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
21
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/clock.h
13
--- a/tcg/s390x/tcg-target.c.inc
23
+++ b/include/hw/clock.h
14
+++ b/tcg/s390x/tcg-target.c.inc
24
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
25
if (clk->period == 0) {
26
return 0;
27
}
28
- /*
29
- * BUG: when CONFIG_INT128 is not defined, the current implementation of
30
- * divu128 does not return a valid truncated quotient, so the result will
31
- * be wrong.
32
- */
33
+
34
divu128(&lo, &hi, clk->period);
35
return lo;
36
}
37
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/qemu/host-utils.h
40
+++ b/include/qemu/host-utils.h
41
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
42
return (__int128_t)a * b / c;
43
}
44
45
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
46
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
47
+ uint64_t divisor)
48
{
49
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
50
__uint128_t result = dividend / divisor;
51
+
52
*plow = result;
53
- *phigh = dividend % divisor;
54
+ *phigh = result >> 64;
55
+ return dividend % divisor;
56
}
57
58
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
59
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
60
+ int64_t divisor)
61
{
62
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
63
+ __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
64
__int128_t result = dividend / divisor;
65
+
66
*plow = result;
67
- *phigh = dividend % divisor;
68
+ *phigh = result >> 64;
69
+ return dividend % divisor;
70
}
71
#else
72
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
73
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
74
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
75
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
76
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
77
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
78
79
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
80
{
81
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/ppc/int_helper.c
84
+++ b/target/ppc/int_helper.c
85
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
86
87
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
88
{
89
- int64_t rt = 0;
90
+ uint64_t rt = 0;
91
int64_t ra = (int64_t)rau;
92
int64_t rb = (int64_t)rbu;
93
int overflow = 0;
94
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
95
int cr;
96
uint64_t lo_value;
97
uint64_t hi_value;
98
+ uint64_t rem;
99
ppc_avr_t ret = { .u64 = { 0, 0 } };
100
101
if (b->VsrSD(0) < 0) {
102
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
103
* In that case, we leave r unchanged.
104
*/
105
} else {
106
- divu128(&lo_value, &hi_value, 1000000000000000ULL);
107
+ rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
108
109
- for (i = 1; i < 16; hi_value /= 10, i++) {
110
- bcd_put_digit(&ret, hi_value % 10, i);
111
+ for (i = 1; i < 16; rem /= 10, i++) {
112
+ bcd_put_digit(&ret, rem % 10, i);
113
}
114
115
for (; i < 32; lo_value /= 10, i++) {
116
diff --git a/util/host-utils.c b/util/host-utils.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/util/host-utils.c
119
+++ b/util/host-utils.c
120
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
121
}
122
123
/*
124
- * Unsigned 128-by-64 division. Returns quotient via plow and
125
- * remainder via phigh.
126
- * The result must fit in 64 bits (plow) - otherwise, the result
127
- * is undefined.
128
- * This function will cause a division by zero if passed a zero divisor.
129
+ * Unsigned 128-by-64 division.
130
+ * Returns the remainder.
131
+ * Returns quotient via plow and phigh.
132
+ * Also returns the remainder via the function return value.
133
*/
134
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
135
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
136
{
137
uint64_t dhi = *phigh;
138
uint64_t dlo = *plow;
139
- unsigned i;
140
- uint64_t carry = 0;
141
+ uint64_t rem, dhighest;
142
+ int sh;
143
144
if (divisor == 0 || dhi == 0) {
145
*plow = dlo / divisor;
146
- *phigh = dlo % divisor;
147
+ *phigh = 0;
148
+ return dlo % divisor;
149
} else {
150
+ sh = clz64(divisor);
151
152
- for (i = 0; i < 64; i++) {
153
- carry = dhi >> 63;
154
- dhi = (dhi << 1) | (dlo >> 63);
155
- if (carry || (dhi >= divisor)) {
156
- dhi -= divisor;
157
- carry = 1;
158
- } else {
159
- carry = 0;
160
+ if (dhi < divisor) {
161
+ if (sh != 0) {
162
+ /* normalize the divisor, shifting the dividend accordingly */
163
+ divisor <<= sh;
164
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
165
+ dlo <<= sh;
166
}
167
- dlo = (dlo << 1) | carry;
168
+
169
+ *phigh = 0;
170
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
171
+ } else {
172
+ if (sh != 0) {
173
+ /* normalize the divisor, shifting the dividend accordingly */
174
+ divisor <<= sh;
175
+ dhighest = dhi >> (64 - sh);
176
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
177
+ dlo <<= sh;
178
+
179
+ *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
180
+ } else {
181
+ /**
182
+ * dhi >= divisor
183
+ * Since the MSB of divisor is set (sh == 0),
184
+ * (dhi - divisor) < divisor
185
+ *
186
+ * Thus, the high part of the quotient is 1, and we can
187
+ * calculate the low part with a single call to udiv_qrnnd
188
+ * after subtracting divisor from dhi
189
+ */
190
+ dhi -= divisor;
191
+ *phigh = 1;
192
+ }
193
+
194
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
195
}
196
197
- *plow = dlo;
198
- *phigh = dhi;
199
+ /*
200
+ * since the dividend/divisor might have been normalized,
201
+ * the remainder might also have to be shifted back
202
+ */
203
+ return rem >> sh;
204
}
16
}
205
}
17
}
206
18
207
/*
19
-#if defined(CONFIG_SOFTMMU)
208
- * Signed 128-by-64 division. Returns quotient via plow and
20
static const TCGLdstHelperParam ldst_helper_param = {
209
- * remainder via phigh.
21
.ntmp = 1, .tmp = { TCG_TMP0 }
210
- * The result must fit in 64 bits (plow) - otherwise, the result
22
};
211
- * is undefined.
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
212
- * This function will cause a division by zero if passed a zero divisor.
24
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
213
+ * Signed 128-by-64 division.
25
return true;
214
+ * Returns quotient via plow and phigh.
26
}
215
+ * Also returns the remainder via the function return value.
27
-#else
216
*/
28
-static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
217
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
29
-{
218
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
30
- if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
219
{
31
- (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
220
- int sgn_dvdnd = *phigh < 0;
32
- return false;
221
- int sgn_divsr = divisor < 0;
222
+ bool neg_quotient = false, neg_remainder = false;
223
+ uint64_t unsig_hi = *phigh, unsig_lo = *plow;
224
+ uint64_t rem;
225
226
- if (sgn_dvdnd) {
227
- *plow = ~(*plow);
228
- *phigh = ~(*phigh);
229
- if (*plow == (int64_t)-1) {
230
+ if (*phigh < 0) {
231
+ neg_quotient = !neg_quotient;
232
+ neg_remainder = !neg_remainder;
233
+
234
+ if (unsig_lo == 0) {
235
+ unsig_hi = -unsig_hi;
236
+ } else {
237
+ unsig_hi = ~unsig_hi;
238
+ unsig_lo = -unsig_lo;
239
+ }
240
+ }
241
+
242
+ if (divisor < 0) {
243
+ neg_quotient = !neg_quotient;
244
+
245
+ divisor = -divisor;
246
+ }
247
+
248
+ rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
249
+
250
+ if (neg_quotient) {
251
+ if (unsig_lo == 0) {
252
+ *phigh = -unsig_hi;
253
*plow = 0;
254
- (*phigh)++;
255
- } else {
256
- (*plow)++;
257
- }
258
+ } else {
259
+ *phigh = ~unsig_hi;
260
+ *plow = -unsig_lo;
261
+ }
262
+ } else {
263
+ *phigh = unsig_hi;
264
+ *plow = unsig_lo;
265
}
266
267
- if (sgn_divsr) {
268
- divisor = 0 - divisor;
269
- }
33
- }
270
-
34
-
271
- divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
35
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
272
-
37
-
273
- if (sgn_dvdnd ^ sgn_divsr) {
38
- /* "Tail call" to the helper, with the return address back inline. */
274
- *plow = 0 - *plow;
39
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
275
+ if (neg_remainder) {
40
- tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
276
+ return -rem;
41
- : helper_unaligned_st));
277
+ } else {
42
- return true;
278
+ return rem;
43
-}
279
}
44
-
280
}
45
-static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
281
#endif
46
-{
47
- return tcg_out_fail_alignment(s, l);
48
-}
49
-
50
-static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
51
-{
52
- return tcg_out_fail_alignment(s, l);
53
-}
54
-#endif /* CONFIG_SOFTMMU */
55
56
/*
57
* For softmmu, perform the TLB load and compare.
282
--
58
--
283
2.25.1
59
2.34.1
284
285
diff view generated by jsdifflib
1
Pull the "op r, a, i => mov r, a" optimization into a function,
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
and use them in the outer-most logical operations.
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
3
---
7
tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
4
tcg/sparc64/tcg-target.c.inc | 15 +++++++--------
8
1 file changed, 26 insertions(+), 35 deletions(-)
5
1 file changed, 7 insertions(+), 8 deletions(-)
9
6
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
9
--- a/tcg/sparc64/tcg-target.c.inc
13
+++ b/tcg/optimize.c
10
+++ b/tcg/sparc64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
11
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
15
return false;
12
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
13
#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
14
15
-/* Define some temporary registers. T2 is used for constant generation. */
16
+/* Define some temporary registers. T3 is used for constant generation. */
17
#define TCG_REG_T1 TCG_REG_G1
18
-#define TCG_REG_T2 TCG_REG_O7
19
+#define TCG_REG_T2 TCG_REG_G2
20
+#define TCG_REG_T3 TCG_REG_O7
21
22
#ifndef CONFIG_SOFTMMU
23
# define TCG_GUEST_BASE_REG TCG_REG_I5
24
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
25
TCG_REG_I4,
26
TCG_REG_I5,
27
28
- TCG_REG_G2,
29
TCG_REG_G3,
30
TCG_REG_G4,
31
TCG_REG_G5,
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
static void tcg_out_movi(TCGContext *s, TCGType type,
34
TCGReg ret, tcg_target_long arg)
35
{
36
- tcg_debug_assert(ret != TCG_REG_T2);
37
- tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T2);
38
+ tcg_debug_assert(ret != TCG_REG_T3);
39
+ tcg_out_movi_int(s, type, ret, arg, false, TCG_REG_T3);
16
}
40
}
17
41
18
+/* If the binary operation has second argument @i, fold to identity. */
42
static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs)
19
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
43
@@ -XXX,XX +XXX,XX @@ static void tcg_out_jmpl_const(TCGContext *s, const tcg_insn_unit *dest,
20
+{
21
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
23
+ }
24
+ return false;
25
+}
26
+
27
/* If the binary operation has second argument @i, fold to NOT. */
28
static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
29
{
44
{
30
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
45
uintptr_t desti = (uintptr_t)dest;
31
46
32
static bool fold_add(OptContext *ctx, TCGOp *op)
47
- /* Be careful not to clobber %o7 for a tail call. */
33
{
48
tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_REG_T1,
34
- return fold_const2(ctx, op);
49
- desti & ~0xfff, in_prologue,
35
+ if (fold_const2(ctx, op) ||
50
- tail_call ? TCG_REG_G2 : TCG_REG_O7);
36
+ fold_xi_to_x(ctx, op, 0)) {
51
+ desti & ~0xfff, in_prologue, TCG_REG_T2);
37
+ return true;
52
tcg_out_arithi(s, tail_call ? TCG_REG_G0 : TCG_REG_O7,
38
+ }
53
TCG_REG_T1, desti & 0xfff, JMPL);
39
+ return false;
40
}
54
}
41
55
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
42
static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
56
tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
43
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
57
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
44
{
58
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
45
if (fold_const2(ctx, op) ||
59
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T3); /* for internal use */
46
fold_xi_to_i(ctx, op, 0) ||
47
+ fold_xi_to_x(ctx, op, -1) ||
48
fold_xx_to_x(ctx, op)) {
49
return true;
50
}
51
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
52
{
53
if (fold_const2(ctx, op) ||
54
fold_xx_to_i(ctx, op, 0) ||
55
+ fold_xi_to_x(ctx, op, 0) ||
56
fold_ix_to_not(ctx, op, -1)) {
57
return true;
58
}
59
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
60
static bool fold_eqv(OptContext *ctx, TCGOp *op)
61
{
62
if (fold_const2(ctx, op) ||
63
+ fold_xi_to_x(ctx, op, -1) ||
64
fold_xi_to_not(ctx, op, 0)) {
65
return true;
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
68
static bool fold_or(OptContext *ctx, TCGOp *op)
69
{
70
if (fold_const2(ctx, op) ||
71
+ fold_xi_to_x(ctx, op, 0) ||
72
fold_xx_to_x(ctx, op)) {
73
return true;
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
76
static bool fold_orc(OptContext *ctx, TCGOp *op)
77
{
78
if (fold_const2(ctx, op) ||
79
+ fold_xi_to_x(ctx, op, -1) ||
80
fold_ix_to_not(ctx, op, 0)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
84
85
static bool fold_shift(OptContext *ctx, TCGOp *op)
86
{
87
- return fold_const2(ctx, op);
88
+ if (fold_const2(ctx, op) ||
89
+ fold_xi_to_x(ctx, op, 0)) {
90
+ return true;
91
+ }
92
+ return false;
93
}
60
}
94
61
95
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
62
#define ELF_HOST_MACHINE EM_SPARCV9
96
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
97
{
98
if (fold_const2(ctx, op) ||
99
fold_xx_to_i(ctx, op, 0) ||
100
+ fold_xi_to_x(ctx, op, 0) ||
101
fold_sub_to_neg(ctx, op)) {
102
return true;
103
}
104
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
105
{
106
if (fold_const2(ctx, op) ||
107
fold_xx_to_i(ctx, op, 0) ||
108
+ fold_xi_to_x(ctx, op, 0) ||
109
fold_xi_to_not(ctx, op, -1)) {
110
return true;
111
}
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
break;
114
}
115
116
- /* Simplify expression for "op r, a, const => mov r, a" cases */
117
- switch (opc) {
118
- CASE_OP_32_64_VEC(add):
119
- CASE_OP_32_64_VEC(sub):
120
- CASE_OP_32_64_VEC(or):
121
- CASE_OP_32_64_VEC(xor):
122
- CASE_OP_32_64_VEC(andc):
123
- CASE_OP_32_64(shl):
124
- CASE_OP_32_64(shr):
125
- CASE_OP_32_64(sar):
126
- CASE_OP_32_64(rotl):
127
- CASE_OP_32_64(rotr):
128
- if (!arg_is_const(op->args[1])
129
- && arg_is_const(op->args[2])
130
- && arg_info(op->args[2])->val == 0) {
131
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
132
- continue;
133
- }
134
- break;
135
- CASE_OP_32_64_VEC(and):
136
- CASE_OP_32_64_VEC(orc):
137
- CASE_OP_32_64(eqv):
138
- if (!arg_is_const(op->args[1])
139
- && arg_is_const(op->args[2])
140
- && arg_info(op->args[2])->val == -1) {
141
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
142
- continue;
143
- }
144
- break;
145
- default:
146
- break;
147
- }
148
-
149
/* Simplify using known-zero bits. Currently only ops with a single
150
output argument is supported. */
151
z_mask = -1;
152
--
63
--
153
2.25.1
64
2.34.1
154
155
diff view generated by jsdifflib
1
Adjust the interface to take the OptContext parameter instead
1
Emphasize that the constant is signed.
2
of TCGContext or both.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
6
tcg/sparc64/tcg-target.c.inc | 21 +++++++++++----------
9
1 file changed, 34 insertions(+), 33 deletions(-)
7
1 file changed, 11 insertions(+), 10 deletions(-)
10
8
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
11
--- a/tcg/sparc64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
12
+++ b/tcg/sparc64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_sethi(TCGContext *s, TCGReg ret, uint32_t arg)
16
} TempOptInfo;
14
tcg_out32(s, SETHI | INSN_RD(ret) | ((arg & 0xfffffc00) >> 10));
17
18
typedef struct OptContext {
19
+ TCGContext *tcg;
20
TCGTempSet temps_used;
21
} OptContext;
22
23
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
24
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
25
}
15
}
26
16
27
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
17
-static void tcg_out_movi_imm13(TCGContext *s, TCGReg ret, int32_t arg)
28
+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
18
+/* A 13-bit constant sign-extended to 64 bits. */
19
+static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
29
{
20
{
30
TCGTemp *dst_ts = arg_temp(dst);
21
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
31
TCGTemp *src_ts = arg_temp(src);
22
}
32
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
23
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
33
TCGOpcode new_op;
24
{
34
25
if (check_fit_i32(arg, 13)) {
35
if (ts_are_copies(dst_ts, src_ts)) {
26
/* A 13-bit constant sign-extended to 64-bits. */
36
- tcg_op_remove(s, op);
27
- tcg_out_movi_imm13(s, ret, arg);
37
+ tcg_op_remove(ctx->tcg, op);
28
+ tcg_out_movi_s13(s, ret, arg);
29
} else {
30
/* A 32-bit constant zero-extended to 64 bits. */
31
tcg_out_sethi(s, ret, arg);
32
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
33
34
/* A 13-bit constant sign-extended to 64-bits. */
35
if (check_fit_tl(arg, 13)) {
36
- tcg_out_movi_imm13(s, ret, arg);
37
+ tcg_out_movi_s13(s, ret, arg);
38
return;
38
return;
39
}
39
}
40
40
41
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
41
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGReg ret,
42
43
default:
44
tcg_out_cmp(s, c1, c2, c2const);
45
- tcg_out_movi_imm13(s, ret, 0);
46
+ tcg_out_movi_s13(s, ret, 0);
47
tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
48
return;
49
}
50
@@ -XXX,XX +XXX,XX @@ static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGReg ret,
51
/* For 64-bit signed comparisons vs zero, we can avoid the compare
52
if the input does not overlap the output. */
53
if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
54
- tcg_out_movi_imm13(s, ret, 0);
55
+ tcg_out_movi_s13(s, ret, 0);
56
tcg_out_movr(s, cond, ret, c1, 1, 1);
57
} else {
58
tcg_out_cmp(s, c1, c2, c2const);
59
- tcg_out_movi_imm13(s, ret, 0);
60
+ tcg_out_movi_s13(s, ret, 0);
61
tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
42
}
62
}
43
}
63
}
44
64
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
45
-static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
65
if (use_vis3_instructions && !is_sub) {
46
- TCGOp *op, TCGArg dst, uint64_t val)
66
/* Note that ADDXC doesn't accept immediates. */
47
+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
67
if (bhconst && bh != 0) {
48
+ TCGArg dst, uint64_t val)
68
- tcg_out_movi_imm13(s, TCG_REG_T2, bh);
69
+ tcg_out_movi_s13(s, TCG_REG_T2, bh);
70
bh = TCG_REG_T2;
71
}
72
tcg_out_arith(s, rh, ah, bh, ARITH_ADDXC);
73
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2_i64(TCGContext *s, TCGReg rl, TCGReg rh,
74
* so the adjustment fits 12 bits.
75
*/
76
if (bhconst) {
77
- tcg_out_movi_imm13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
78
+ tcg_out_movi_s13(s, TCG_REG_T2, bh + (is_sub ? -1 : 1));
79
} else {
80
tcg_out_arithi(s, TCG_REG_T2, bh, 1,
81
is_sub ? ARITH_SUB : ARITH_ADD);
82
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
83
tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
84
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
85
/* delay slot */
86
- tcg_out_movi_imm13(s, TCG_REG_O0, 0);
87
+ tcg_out_movi_s13(s, TCG_REG_O0, 0);
88
89
build_trampolines(s);
90
}
91
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
49
{
92
{
50
const TCGOpDef *def = &tcg_op_defs[op->opc];
93
if (check_fit_ptr(a0, 13)) {
51
TCGType type;
94
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
52
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
95
- tcg_out_movi_imm13(s, TCG_REG_O0, a0);
53
/* Convert movi to mov with constant temp. */
96
+ tcg_out_movi_s13(s, TCG_REG_O0, a0);
54
tv = tcg_constant_internal(type, val);
97
return;
55
init_ts_info(ctx, tv);
98
} else {
56
- tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
99
intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0);
57
+ tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
58
}
59
60
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
{
63
int nb_temps, nb_globals, i;
64
TCGOp *op, *op_next, *prev_mb = NULL;
65
- OptContext ctx = {};
66
+ OptContext ctx = { .tcg = s };
67
68
/* Array VALS has an element for each temp.
69
If this temp holds a constant then its value is kept in VALS' element.
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
CASE_OP_32_64(rotr):
72
if (arg_is_const(op->args[1])
73
&& arg_info(op->args[1])->val == 0) {
74
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
76
continue;
77
}
78
break;
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
if (!arg_is_const(op->args[1])
81
&& arg_is_const(op->args[2])
82
&& arg_info(op->args[2])->val == 0) {
83
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
84
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
85
continue;
86
}
87
break;
88
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
89
if (!arg_is_const(op->args[1])
90
&& arg_is_const(op->args[2])
91
&& arg_info(op->args[2])->val == -1) {
92
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
93
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
94
continue;
95
}
96
break;
97
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
98
99
if (partmask == 0) {
100
tcg_debug_assert(nb_oargs == 1);
101
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
102
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
103
continue;
104
}
105
if (affected == 0) {
106
tcg_debug_assert(nb_oargs == 1);
107
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
108
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
109
continue;
110
}
111
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
CASE_OP_32_64(mulsh):
114
if (arg_is_const(op->args[2])
115
&& arg_info(op->args[2])->val == 0) {
116
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
117
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
118
continue;
119
}
120
break;
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
122
CASE_OP_32_64_VEC(or):
123
CASE_OP_32_64_VEC(and):
124
if (args_are_copies(op->args[1], op->args[2])) {
125
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
126
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
127
continue;
128
}
129
break;
130
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
131
CASE_OP_32_64_VEC(sub):
132
CASE_OP_32_64_VEC(xor):
133
if (args_are_copies(op->args[1], op->args[2])) {
134
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
135
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
136
continue;
137
}
138
break;
139
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
140
allocator where needed and possible. Also detect copies. */
141
switch (opc) {
142
CASE_OP_32_64_VEC(mov):
143
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
144
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
145
continue;
146
147
case INDEX_op_dup_vec:
148
if (arg_is_const(op->args[1])) {
149
tmp = arg_info(op->args[1])->val;
150
tmp = dup_const(TCGOP_VECE(op), tmp);
151
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
153
continue;
154
}
155
break;
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
157
case INDEX_op_dup2_vec:
158
assert(TCG_TARGET_REG_BITS == 32);
159
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
160
- tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
+ tcg_opt_gen_movi(&ctx, op, op->args[0],
162
deposit64(arg_info(op->args[1])->val, 32, 32,
163
arg_info(op->args[2])->val));
164
continue;
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
166
case INDEX_op_extrh_i64_i32:
167
if (arg_is_const(op->args[1])) {
168
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
169
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
171
continue;
172
}
173
break;
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
175
if (arg_is_const(op->args[1])) {
176
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
177
op->args[2]);
178
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
180
continue;
181
}
182
break;
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
185
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
186
arg_info(op->args[2])->val);
187
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
189
continue;
190
}
191
break;
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
193
TCGArg v = arg_info(op->args[1])->val;
194
if (v != 0) {
195
tmp = do_constant_folding(opc, v, 0);
196
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
198
} else {
199
- tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
200
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
201
}
202
continue;
203
}
204
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
205
tmp = deposit64(arg_info(op->args[1])->val,
206
op->args[3], op->args[4],
207
arg_info(op->args[2])->val);
208
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
209
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
210
continue;
211
}
212
break;
213
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
214
if (arg_is_const(op->args[1])) {
215
tmp = extract64(arg_info(op->args[1])->val,
216
op->args[2], op->args[3]);
217
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
218
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
219
continue;
220
}
221
break;
222
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
223
if (arg_is_const(op->args[1])) {
224
tmp = sextract64(arg_info(op->args[1])->val,
225
op->args[2], op->args[3]);
226
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
227
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
228
continue;
229
}
230
break;
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
tmp = (int32_t)(((uint32_t)v1 >> shr) |
233
((uint32_t)v2 << (32 - shr)));
234
}
235
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
236
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
237
continue;
238
}
239
break;
240
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
241
tmp = do_constant_folding_cond(opc, op->args[1],
242
op->args[2], op->args[3]);
243
if (tmp != 2) {
244
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
245
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
246
continue;
247
}
248
break;
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
tmp = do_constant_folding_cond(opc, op->args[1],
251
op->args[2], op->args[5]);
252
if (tmp != 2) {
253
- tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
254
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
255
continue;
256
}
257
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
258
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
259
260
rl = op->args[0];
261
rh = op->args[1];
262
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
263
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
264
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
265
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
266
continue;
267
}
268
break;
269
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
270
271
rl = op->args[0];
272
rh = op->args[1];
273
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
274
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
275
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
276
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
277
continue;
278
}
279
break;
280
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
281
op->args[5]);
282
if (tmp != 2) {
283
do_setcond_const:
284
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
285
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
286
continue;
287
}
288
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
289
--
100
--
290
2.25.1
101
2.34.1
291
292
diff view generated by jsdifflib
1
Copy z_mask into OptContext, for writeback to the
1
Shuffle the order in tcg_out_movi_int to check s13 first, and
2
first output within the new function.
2
drop this check from tcg_out_movi_imm32. This might make the
3
sequence for in_prologue larger, but not worth worrying about.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
8
tcg/sparc64/tcg-target.c.inc | 25 ++++++++++---------------
9
1 file changed, 33 insertions(+), 16 deletions(-)
9
1 file changed, 10 insertions(+), 15 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/sparc64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
14
+++ b/tcg/sparc64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
16
TCGContext *tcg;
16
17
TCGOp *prev_mb;
17
static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
18
TCGTempSet temps_used;
18
{
19
+
19
- if (check_fit_i32(arg, 13)) {
20
+ /* In flight values from optimization. */
20
- /* A 13-bit constant sign-extended to 64-bits. */
21
+ uint64_t z_mask;
21
- tcg_out_movi_s13(s, ret, arg);
22
} OptContext;
22
- } else {
23
23
- /* A 32-bit constant zero-extended to 64 bits. */
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
24
- tcg_out_sethi(s, ret, arg);
25
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
25
- if (arg & 0x3ff) {
26
- tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
27
- }
28
+ /* A 32-bit constant zero-extended to 64 bits. */
29
+ tcg_out_sethi(s, ret, arg);
30
+ if (arg & 0x3ff) {
31
+ tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
26
}
32
}
27
}
33
}
28
34
29
+static void finish_folding(OptContext *ctx, TCGOp *op)
35
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
30
+{
36
tcg_target_long hi, lo = (int32_t)arg;
31
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
37
tcg_target_long test, lsb;
32
+ int i, nb_oargs;
38
33
+
39
- /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
34
+ /*
40
- if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
35
+ * For an opcode that ends a BB, reset all temp data.
41
- tcg_out_movi_imm32(s, ret, arg);
36
+ * We do no cross-BB optimization.
42
- return;
37
+ */
43
- }
38
+ if (def->flags & TCG_OPF_BB_END) {
44
-
39
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
45
/* A 13-bit constant sign-extended to 64-bits. */
40
+ ctx->prev_mb = NULL;
46
if (check_fit_tl(arg, 13)) {
47
tcg_out_movi_s13(s, ret, arg);
48
return;
49
}
50
51
+ /* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
52
+ if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
53
+ tcg_out_movi_imm32(s, ret, arg);
41
+ return;
54
+ return;
42
+ }
55
+ }
43
+
56
+
44
+ nb_oargs = def->nb_oargs;
57
/* A 13-bit constant relative to the TB. */
45
+ for (i = 0; i < nb_oargs; i++) {
58
if (!in_prologue) {
46
+ reset_temp(op->args[i]);
59
test = tcg_tbrel_diff(s, (void *)arg);
47
+ /*
48
+ * Save the corresponding known-zero bits mask for the
49
+ * first output argument (only one supported so far).
50
+ */
51
+ if (i == 0) {
52
+ arg_info(op->args[i])->z_mask = ctx->z_mask;
53
+ }
54
+ }
55
+}
56
+
57
static bool fold_call(OptContext *ctx, TCGOp *op)
58
{
59
TCGContext *s = ctx->tcg;
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
partmask &= 0xffffffffu;
62
affected &= 0xffffffffu;
63
}
64
+ ctx.z_mask = z_mask;
65
66
if (partmask == 0) {
67
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
break;
70
}
71
72
- /* Some of the folding above can change opc. */
73
- opc = op->opc;
74
- def = &tcg_op_defs[opc];
75
- if (def->flags & TCG_OPF_BB_END) {
76
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
77
- } else {
78
- int nb_oargs = def->nb_oargs;
79
- for (i = 0; i < nb_oargs; i++) {
80
- reset_temp(op->args[i]);
81
- /* Save the corresponding known-zero bits mask for the
82
- first output argument (only one supported so far). */
83
- if (i == 0) {
84
- arg_info(op->args[i])->z_mask = z_mask;
85
- }
86
- }
87
- }
88
+ finish_folding(&ctx, op);
89
90
/* Eliminate duplicate and redundant fence instructions. */
91
if (ctx.prev_mb) {
92
--
60
--
93
2.25.1
61
2.34.1
94
95
diff view generated by jsdifflib
1
Pull the "op r, 0, b => movi r, 0" optimization into a function,
1
Emphasize that the constant is unsigned.
2
and use it in fold_shift.
3
2
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 28 ++++++++++------------------
6
tcg/sparc64/tcg-target.c.inc | 12 ++++++------
9
1 file changed, 10 insertions(+), 18 deletions(-)
7
1 file changed, 6 insertions(+), 6 deletions(-)
10
8
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
11
--- a/tcg/sparc64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
12
+++ b/tcg/sparc64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
16
return false;
14
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
17
}
15
}
18
16
19
+/* If the binary operation has first argument @i, fold to @i. */
17
-static void tcg_out_movi_imm32(TCGContext *s, TCGReg ret, int32_t arg)
20
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
18
+/* A 32-bit constant zero-extended to 64 bits. */
21
+{
19
+static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
22
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
25
+ return false;
26
+}
27
+
28
/* If the binary operation has first argument @i, fold to NOT. */
29
static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
30
{
20
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
21
- /* A 32-bit constant zero-extended to 64 bits. */
32
static bool fold_shift(OptContext *ctx, TCGOp *op)
22
tcg_out_sethi(s, ret, arg);
33
{
23
if (arg & 0x3ff) {
34
if (fold_const2(ctx, op) ||
24
tcg_out_arithi(s, ret, ret, arg & 0x3ff, ARITH_OR);
35
+ fold_ix_to_i(ctx, op, 0) ||
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
36
fold_xi_to_x(ctx, op, 0)) {
26
37
return true;
27
/* A 32-bit constant, or 32-bit zero-extended to 64-bits. */
28
if (type == TCG_TYPE_I32 || arg == (uint32_t)arg) {
29
- tcg_out_movi_imm32(s, ret, arg);
30
+ tcg_out_movi_u32(s, ret, arg);
31
return;
38
}
32
}
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
40
break;
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
41
}
35
/* A 64-bit constant decomposed into 2 32-bit pieces. */
42
36
if (check_fit_i32(lo, 13)) {
43
- /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
37
hi = (arg - lo) >> 32;
44
- and "sub r, 0, a => neg r, a" case. */
38
- tcg_out_movi_imm32(s, ret, hi);
45
- switch (opc) {
39
+ tcg_out_movi_u32(s, ret, hi);
46
- CASE_OP_32_64(shl):
40
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
47
- CASE_OP_32_64(shr):
41
tcg_out_arithi(s, ret, ret, lo, ARITH_ADD);
48
- CASE_OP_32_64(sar):
42
} else {
49
- CASE_OP_32_64(rotl):
43
hi = arg >> 32;
50
- CASE_OP_32_64(rotr):
44
- tcg_out_movi_imm32(s, ret, hi);
51
- if (arg_is_const(op->args[1])
45
- tcg_out_movi_imm32(s, scratch, lo);
52
- && arg_info(op->args[1])->val == 0) {
46
+ tcg_out_movi_u32(s, ret, hi);
53
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
47
+ tcg_out_movi_u32(s, scratch, lo);
54
- continue;
48
tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
55
- }
49
tcg_out_arith(s, ret, ret, scratch, ARITH_OR);
56
- break;
50
}
57
- default:
58
- break;
59
- }
60
-
61
/* Simplify using known-zero bits. Currently only ops with a single
62
output argument is supported. */
63
z_mask = -1;
64
--
51
--
65
2.25.1
52
2.34.1
66
67
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
3
---
5
tcg/optimize.c | 27 ++++++++++++++++-----------
4
tcg/sparc64/tcg-target.c.inc | 10 ++++++++--
6
1 file changed, 16 insertions(+), 11 deletions(-)
5
1 file changed, 8 insertions(+), 2 deletions(-)
7
6
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
9
--- a/tcg/sparc64/tcg-target.c.inc
11
+++ b/tcg/optimize.c
10
+++ b/tcg/sparc64/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
11
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_s13(TCGContext *s, TCGReg ret, int32_t arg)
13
return false;
12
tcg_out_arithi(s, ret, TCG_REG_G0, arg, ARITH_OR);
14
}
13
}
15
14
16
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
15
+/* A 32-bit constant sign-extended to 64 bits. */
16
+static void tcg_out_movi_s32(TCGContext *s, TCGReg ret, int32_t arg)
17
+{
17
+{
18
+ if (arg_is_const(op->args[1])) {
18
+ tcg_out_sethi(s, ret, ~arg);
19
+ uint64_t t = arg_info(op->args[1])->val;
19
+ tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
20
+
21
+ t = do_constant_folding(op->opc, t, op->args[2]);
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
23
+ }
24
+ return false;
25
+}
20
+}
26
+
21
+
27
static bool fold_call(OptContext *ctx, TCGOp *op)
22
/* A 32-bit constant zero-extended to 64 bits. */
23
static void tcg_out_movi_u32(TCGContext *s, TCGReg ret, uint32_t arg)
28
{
24
{
29
TCGContext *s = ctx->tcg;
25
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
26
31
}
27
/* A 32-bit constant sign-extended to 64-bits. */
32
break;
28
if (arg == lo) {
33
29
- tcg_out_sethi(s, ret, ~arg);
34
- CASE_OP_32_64(bswap16):
30
- tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
35
- CASE_OP_32_64(bswap32):
31
+ tcg_out_movi_s32(s, ret, arg);
36
- case INDEX_op_bswap64_i64:
32
return;
37
- if (arg_is_const(op->args[1])) {
33
}
38
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
34
39
- op->args[2]);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
42
- }
43
- break;
44
-
45
default:
46
break;
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
case INDEX_op_brcond2_i32:
50
done = fold_brcond2(&ctx, op);
51
break;
52
+ CASE_OP_32_64(bswap16):
53
+ CASE_OP_32_64(bswap32):
54
+ case INDEX_op_bswap64_i64:
55
+ done = fold_bswap(&ctx, op);
56
+ break;
57
CASE_OP_32_64(clz):
58
CASE_OP_32_64(ctz):
59
done = fold_count_zeros(&ctx, op);
60
--
35
--
61
2.25.1
36
2.34.1
62
63
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Drop the target-specific trampolines for the standard slow path.
2
This lets us use tcg_out_helper_{ld,st}_args, and handles the new
3
atomicity bits within MemOp.
2
4
3
In preparation for changing the divu128/divs128 implementations
5
At the same time, use the full load/store helpers for user-only mode.
4
to allow for quotients larger than 64 bits, move the div-by-zero
6
Drop inline unaligned access support for user-only mode, as it does
5
and overflow checks to the callers.
7
not handle atomicity.
6
8
7
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
9
Use TCG_REG_T[1-3] in the tlb lookup, instead of TCG_REG_O[0-2].
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
This allows the constraints to be simplified.
9
Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
11
12
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
14
---
12
include/hw/clock.h | 5 +++--
15
tcg/sparc64/tcg-target-con-set.h | 2 -
13
include/qemu/host-utils.h | 34 ++++++++++++---------------------
16
tcg/sparc64/tcg-target-con-str.h | 1 -
14
target/ppc/int_helper.c | 14 +++++++++-----
17
tcg/sparc64/tcg-target.h | 1 +
15
util/host-utils.c | 40 ++++++++++++++++++---------------------
18
tcg/sparc64/tcg-target.c.inc | 610 +++++++++----------------------
16
4 files changed, 42 insertions(+), 51 deletions(-)
19
4 files changed, 182 insertions(+), 432 deletions(-)
17
20
18
diff --git a/include/hw/clock.h b/include/hw/clock.h
21
diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h
19
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/clock.h
23
--- a/tcg/sparc64/tcg-target-con-set.h
21
+++ b/include/hw/clock.h
24
+++ b/tcg/sparc64/tcg-target-con-set.h
22
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
25
@@ -XXX,XX +XXX,XX @@
23
return 0;
26
C_O0_I1(r)
24
}
27
C_O0_I2(rZ, r)
25
/*
28
C_O0_I2(rZ, rJ)
26
- * Ignore divu128() return value as we've caught div-by-zero and don't
29
-C_O0_I2(sZ, s)
27
- * need different behaviour for overflow.
30
-C_O1_I1(r, s)
28
+ * BUG: when CONFIG_INT128 is not defined, the current implementation of
31
C_O1_I1(r, r)
29
+ * divu128 does not return a valid truncated quotient, so the result will
32
C_O1_I2(r, r, r)
30
+ * be wrong.
33
C_O1_I2(r, rZ, rJ)
31
*/
34
diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h
32
divu128(&lo, &hi, clk->period);
33
return lo;
34
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
35
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/qemu/host-utils.h
36
--- a/tcg/sparc64/tcg-target-con-str.h
37
+++ b/include/qemu/host-utils.h
37
+++ b/tcg/sparc64/tcg-target-con-str.h
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
38
@@ -XXX,XX +XXX,XX @@
39
return (__int128_t)a * b / c;
39
* REGS(letter, register_mask)
40
*/
41
REGS('r', ALL_GENERAL_REGS)
42
-REGS('s', ALL_QLDST_REGS)
43
44
/*
45
* Define constraint letters for constants:
46
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/sparc64/tcg-target.h
49
+++ b/tcg/sparc64/tcg-target.h
50
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
51
52
#define TCG_TARGET_DEFAULT_MO (0)
53
#define TCG_TARGET_HAS_MEMORY_BSWAP 1
54
+#define TCG_TARGET_NEED_LDST_LABELS
55
#define TCG_TARGET_NEED_POOL_LABELS
56
57
#endif
58
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/sparc64/tcg-target.c.inc
61
+++ b/tcg/sparc64/tcg-target.c.inc
62
@@ -XXX,XX +XXX,XX @@
63
#error "unsupported code generation mode"
64
#endif
65
66
+#include "../tcg-ldst.c.inc"
67
#include "../tcg-pool.c.inc"
68
69
#ifdef CONFIG_DEBUG_TCG
70
@@ -XXX,XX +XXX,XX @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
71
#define TCG_CT_CONST_S13 0x200
72
#define TCG_CT_CONST_ZERO 0x400
73
74
-/*
75
- * For softmmu, we need to avoid conflicts with the first 3
76
- * argument registers to perform the tlb lookup, and to call
77
- * the helper function.
78
- */
79
-#ifdef CONFIG_SOFTMMU
80
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_O0, 3)
81
-#else
82
-#define SOFTMMU_RESERVE_REGS 0
83
-#endif
84
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
85
-#define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
86
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
87
88
/* Define some temporary registers. T3 is used for constant generation. */
89
#define TCG_REG_T1 TCG_REG_G1
90
@@ -XXX,XX +XXX,XX @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
91
tcg_out32(s, MEMBAR | (a0 & TCG_MO_ALL));
40
}
92
}
41
93
42
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
94
-#ifdef CONFIG_SOFTMMU
43
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
95
-static const tcg_insn_unit *qemu_ld_trampoline[MO_SSIZE + 1];
96
-static const tcg_insn_unit *qemu_st_trampoline[MO_SIZE + 1];
97
-
98
-static void build_trampolines(TCGContext *s)
99
-{
100
- int i;
101
-
102
- for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) {
103
- if (qemu_ld_helpers[i] == NULL) {
104
- continue;
105
- }
106
-
107
- /* May as well align the trampoline. */
108
- while ((uintptr_t)s->code_ptr & 15) {
109
- tcg_out_nop(s);
110
- }
111
- qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
112
-
113
- /* Set the retaddr operand. */
114
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7);
115
- /* Tail call. */
116
- tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true);
117
- /* delay slot -- set the env argument */
118
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
119
- }
120
-
121
- for (i = 0; i < ARRAY_SIZE(qemu_st_helpers); ++i) {
122
- if (qemu_st_helpers[i] == NULL) {
123
- continue;
124
- }
125
-
126
- /* May as well align the trampoline. */
127
- while ((uintptr_t)s->code_ptr & 15) {
128
- tcg_out_nop(s);
129
- }
130
- qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr);
131
-
132
- /* Set the retaddr operand. */
133
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7);
134
-
135
- /* Tail call. */
136
- tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true);
137
- /* delay slot -- set the env argument */
138
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
139
- }
140
-}
141
-#else
142
-static const tcg_insn_unit *qemu_unalign_ld_trampoline;
143
-static const tcg_insn_unit *qemu_unalign_st_trampoline;
144
-
145
-static void build_trampolines(TCGContext *s)
146
-{
147
- for (int ld = 0; ld < 2; ++ld) {
148
- void *helper;
149
-
150
- while ((uintptr_t)s->code_ptr & 15) {
151
- tcg_out_nop(s);
152
- }
153
-
154
- if (ld) {
155
- helper = helper_unaligned_ld;
156
- qemu_unalign_ld_trampoline = tcg_splitwx_to_rx(s->code_ptr);
157
- } else {
158
- helper = helper_unaligned_st;
159
- qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr);
160
- }
161
-
162
- /* Tail call. */
163
- tcg_out_jmpl_const(s, helper, true, true);
164
- /* delay slot -- set the env argument */
165
- tcg_out_mov_delay(s, TCG_REG_O0, TCG_AREG0);
166
- }
167
-}
168
-#endif
169
-
170
/* Generate global QEMU prologue and epilogue code */
171
static void tcg_target_qemu_prologue(TCGContext *s)
44
{
172
{
45
- if (divisor == 0) {
173
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
46
- return 1;
174
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN);
47
- } else {
175
/* delay slot */
48
- __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
176
tcg_out_movi_s13(s, TCG_REG_O0, 0);
49
- __uint128_t result = dividend / divisor;
177
-
50
- *plow = result;
178
- build_trampolines(s);
51
- *phigh = dividend % divisor;
52
- return result > UINT64_MAX;
53
- }
54
+ __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
55
+ __uint128_t result = dividend / divisor;
56
+ *plow = result;
57
+ *phigh = dividend % divisor;
58
}
179
}
59
180
60
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
181
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
61
+static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
62
{
63
- if (divisor == 0) {
64
- return 1;
65
- } else {
66
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
67
- __int128_t result = dividend / divisor;
68
- *plow = result;
69
- *phigh = dividend % divisor;
70
- return result != *plow;
71
- }
72
+ __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
73
+ __int128_t result = dividend / divisor;
74
+ *plow = result;
75
+ *phigh = dividend % divisor;
76
}
77
#else
78
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
79
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
80
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
81
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
82
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
83
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
84
85
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
86
{
87
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/ppc/int_helper.c
90
+++ b/target/ppc/int_helper.c
91
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
92
uint64_t rt = 0;
93
int overflow = 0;
94
95
- overflow = divu128(&rt, &ra, rb);
96
-
97
- if (unlikely(overflow)) {
98
+ if (unlikely(rb == 0 || ra >= rb)) {
99
+ overflow = 1;
100
rt = 0; /* Undefined */
101
+ } else {
102
+ divu128(&rt, &ra, rb);
103
}
104
105
if (oe) {
106
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
107
int64_t rt = 0;
108
int64_t ra = (int64_t)rau;
109
int64_t rb = (int64_t)rbu;
110
- int overflow = divs128(&rt, &ra, rb);
111
+ int overflow = 0;
112
113
- if (unlikely(overflow)) {
114
+ if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
115
+ overflow = 1;
116
rt = 0; /* Undefined */
117
+ } else {
118
+ divs128(&rt, &ra, rb);
119
}
120
121
if (oe) {
122
diff --git a/util/host-utils.c b/util/host-utils.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/util/host-utils.c
125
+++ b/util/host-utils.c
126
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
127
*phigh = rh;
128
}
129
130
-/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
131
-/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
132
-/* remainder via phigh. */
133
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
134
+/*
135
+ * Unsigned 128-by-64 division. Returns quotient via plow and
136
+ * remainder via phigh.
137
+ * The result must fit in 64 bits (plow) - otherwise, the result
138
+ * is undefined.
139
+ * This function will cause a division by zero if passed a zero divisor.
140
+ */
141
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
142
{
143
uint64_t dhi = *phigh;
144
uint64_t dlo = *plow;
145
unsigned i;
146
uint64_t carry = 0;
147
148
- if (divisor == 0) {
149
- return 1;
150
- } else if (dhi == 0) {
151
+ if (divisor == 0 || dhi == 0) {
152
*plow = dlo / divisor;
153
*phigh = dlo % divisor;
154
- return 0;
155
- } else if (dhi >= divisor) {
156
- return 1;
157
} else {
158
159
for (i = 0; i < 64; i++) {
160
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
161
162
*plow = dlo;
163
*phigh = dhi;
164
- return 0;
165
}
183
}
166
}
184
}
167
185
168
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
186
-#if defined(CONFIG_SOFTMMU)
187
+static const TCGLdstHelperParam ldst_helper_param = {
188
+ .ntmp = 1, .tmp = { TCG_REG_T1 }
189
+};
190
191
-/* We expect to use a 13-bit negative offset from ENV. */
192
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
193
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
194
-
195
-/* Perform the TLB load and compare.
196
-
197
- Inputs:
198
- ADDRLO and ADDRHI contain the possible two parts of the address.
199
-
200
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
201
-
202
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
203
- This should be offsetof addr_read or addr_write.
204
-
205
- The result of the TLB comparison is in %[ix]cc. The sanitized address
206
- is in the returned register, maybe %o0. The TLB addend is in %o1. */
207
-
208
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index,
209
- MemOp opc, int which)
210
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
211
{
212
+ MemOp opc = get_memop(lb->oi);
213
+ MemOp sgn;
214
+
215
+ if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
216
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
217
+ return false;
218
+ }
219
+
220
+ /* Use inline tcg_out_ext32s; otherwise let the helper sign-extend. */
221
+ sgn = (opc & MO_SIZE) < MO_32 ? MO_SIGN : 0;
222
+
223
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
224
+ tcg_out_call(s, qemu_ld_helpers[opc & (MO_SIZE | sgn)], NULL);
225
+ tcg_out_ld_helper_ret(s, lb, sgn, &ldst_helper_param);
226
+
227
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
228
+ return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
229
+ (intptr_t)lb->raddr, 0);
230
+}
231
+
232
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
233
+{
234
+ MemOp opc = get_memop(lb->oi);
235
+
236
+ if (!patch_reloc(lb->label_ptr[0], R_SPARC_WDISP19,
237
+ (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 0)) {
238
+ return false;
239
+ }
240
+
241
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
242
+ tcg_out_call(s, qemu_st_helpers[opc & MO_SIZE], NULL);
243
+
244
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
245
+ return patch_reloc(s->code_ptr - 1, R_SPARC_WDISP19,
246
+ (intptr_t)lb->raddr, 0);
247
+}
248
+
249
+typedef struct {
250
+ TCGReg base;
251
+ TCGReg index;
252
+} HostAddress;
253
+
169
+/*
254
+/*
170
+ * Signed 128-by-64 division. Returns quotient via plow and
255
+ * For softmmu, perform the TLB load and compare.
171
+ * remainder via phigh.
256
+ * For useronly, perform any required alignment tests.
172
+ * The result must fit in 64 bits (plow) - otherwise, the result
257
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
173
+ * is undefined.
258
+ * is required and fill in @h with the host address for the fast path.
174
+ * This function will cause a division by zero if passed a zero divisor.
175
+ */
259
+ */
176
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
260
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
261
+ TCGReg addr_reg, MemOpIdx oi,
262
+ bool is_ld)
263
+{
264
+ TCGLabelQemuLdst *ldst = NULL;
265
+ MemOp opc = get_memop(oi);
266
+ unsigned a_bits = get_alignment_bits(opc);
267
+ unsigned s_bits = opc & MO_SIZE;
268
+ unsigned a_mask;
269
+
270
+ /* We don't support unaligned accesses. */
271
+ a_bits = MAX(a_bits, s_bits);
272
+ a_mask = (1u << a_bits) - 1;
273
+
274
+#ifdef CONFIG_SOFTMMU
275
+ int mem_index = get_mmuidx(oi);
276
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
277
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
278
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
279
- const TCGReg r0 = TCG_REG_O0;
280
- const TCGReg r1 = TCG_REG_O1;
281
- const TCGReg r2 = TCG_REG_O2;
282
- unsigned s_bits = opc & MO_SIZE;
283
- unsigned a_bits = get_alignment_bits(opc);
284
- tcg_target_long compare_mask;
285
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
286
+ : offsetof(CPUTLBEntry, addr_write);
287
+ int add_off = offsetof(CPUTLBEntry, addend);
288
+ int compare_mask;
289
+ int cc;
290
291
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
292
- tcg_out_ld(s, TCG_TYPE_PTR, r0, TCG_AREG0, mask_off);
293
- tcg_out_ld(s, TCG_TYPE_PTR, r1, TCG_AREG0, table_off);
294
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
295
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 12));
296
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T2, TCG_AREG0, mask_off);
297
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T3, TCG_AREG0, table_off);
298
299
/* Extract the page index, shifted into place for tlb index. */
300
- tcg_out_arithi(s, r2, addr, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
301
- SHIFT_SRL);
302
- tcg_out_arith(s, r2, r2, r0, ARITH_AND);
303
+ tcg_out_arithi(s, TCG_REG_T1, addr_reg,
304
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
305
+ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
306
307
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
308
- tcg_out_arith(s, r2, r2, r1, ARITH_ADD);
309
+ tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T3, ARITH_ADD);
310
311
- /* Load the tlb comparator and the addend. */
312
- tcg_out_ld(s, TCG_TYPE_TL, r0, r2, which);
313
- tcg_out_ld(s, TCG_TYPE_PTR, r1, r2, offsetof(CPUTLBEntry, addend));
314
+ /* Load the tlb comparator and the addend. */
315
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_T2, TCG_REG_T1, cmp_off);
316
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_T1, TCG_REG_T1, add_off);
317
+ h->base = TCG_REG_T1;
318
319
- /* Mask out the page offset, except for the required alignment.
320
- We don't support unaligned accesses. */
321
- if (a_bits < s_bits) {
322
- a_bits = s_bits;
323
- }
324
- compare_mask = (tcg_target_ulong)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
325
+ /* Mask out the page offset, except for the required alignment. */
326
+ compare_mask = TARGET_PAGE_MASK | a_mask;
327
if (check_fit_tl(compare_mask, 13)) {
328
- tcg_out_arithi(s, r2, addr, compare_mask, ARITH_AND);
329
+ tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
330
} else {
331
- tcg_out_movi(s, TCG_TYPE_TL, r2, compare_mask);
332
- tcg_out_arith(s, r2, addr, r2, ARITH_AND);
333
+ tcg_out_movi_s32(s, TCG_REG_T3, compare_mask);
334
+ tcg_out_arith(s, TCG_REG_T3, addr_reg, TCG_REG_T3, ARITH_AND);
335
}
336
- tcg_out_cmp(s, r0, r2, 0);
337
+ tcg_out_cmp(s, TCG_REG_T2, TCG_REG_T3, 0);
338
339
- /* If the guest address must be zero-extended, do so now. */
340
+ ldst = new_ldst_label(s);
341
+ ldst->is_ld = is_ld;
342
+ ldst->oi = oi;
343
+ ldst->addrlo_reg = addr_reg;
344
+ ldst->label_ptr[0] = s->code_ptr;
345
+
346
+ /* bne,pn %[xi]cc, label0 */
347
+ cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
348
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
349
+#else
350
+ if (a_bits != s_bits) {
351
+ /*
352
+ * Test for at least natural alignment, and defer
353
+ * everything else to the helper functions.
354
+ */
355
+ tcg_debug_assert(check_fit_tl(a_mask, 13));
356
+ tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
357
+
358
+ ldst = new_ldst_label(s);
359
+ ldst->is_ld = is_ld;
360
+ ldst->oi = oi;
361
+ ldst->addrlo_reg = addr_reg;
362
+ ldst->label_ptr[0] = s->code_ptr;
363
+
364
+ /* bne,pn %icc, label0 */
365
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN | BPCC_ICC, 0);
366
+ }
367
+ h->base = guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0;
368
+#endif
369
+
370
+ /* If the guest address must be zero-extended, do in the delay slot. */
371
if (TARGET_LONG_BITS == 32) {
372
- tcg_out_ext32u(s, r0, addr);
373
- return r0;
374
+ tcg_out_ext32u(s, TCG_REG_T2, addr_reg);
375
+ h->index = TCG_REG_T2;
376
+ } else {
377
+ if (ldst) {
378
+ tcg_out_nop(s);
379
+ }
380
+ h->index = addr_reg;
381
}
382
- return addr;
383
+ return ldst;
384
}
385
-#endif /* CONFIG_SOFTMMU */
386
-
387
-static const int qemu_ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
388
- [MO_UB] = LDUB,
389
- [MO_SB] = LDSB,
390
- [MO_UB | MO_LE] = LDUB,
391
- [MO_SB | MO_LE] = LDSB,
392
-
393
- [MO_BEUW] = LDUH,
394
- [MO_BESW] = LDSH,
395
- [MO_BEUL] = LDUW,
396
- [MO_BESL] = LDSW,
397
- [MO_BEUQ] = LDX,
398
- [MO_BESQ] = LDX,
399
-
400
- [MO_LEUW] = LDUH_LE,
401
- [MO_LESW] = LDSH_LE,
402
- [MO_LEUL] = LDUW_LE,
403
- [MO_LESL] = LDSW_LE,
404
- [MO_LEUQ] = LDX_LE,
405
- [MO_LESQ] = LDX_LE,
406
-};
407
-
408
-static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
409
- [MO_UB] = STB,
410
-
411
- [MO_BEUW] = STH,
412
- [MO_BEUL] = STW,
413
- [MO_BEUQ] = STX,
414
-
415
- [MO_LEUW] = STH_LE,
416
- [MO_LEUL] = STW_LE,
417
- [MO_LEUQ] = STX_LE,
418
-};
419
420
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
421
MemOpIdx oi, TCGType data_type)
177
{
422
{
178
int sgn_dvdnd = *phigh < 0;
423
- MemOp memop = get_memop(oi);
179
int sgn_divsr = divisor < 0;
424
- tcg_insn_unit *label_ptr;
180
- int overflow = 0;
425
+ static const int ld_opc[(MO_SSIZE | MO_BSWAP) + 1] = {
181
426
+ [MO_UB] = LDUB,
182
if (sgn_dvdnd) {
427
+ [MO_SB] = LDSB,
183
*plow = ~(*plow);
428
+ [MO_UB | MO_LE] = LDUB,
184
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
429
+ [MO_SB | MO_LE] = LDSB,
185
divisor = 0 - divisor;
430
431
-#ifdef CONFIG_SOFTMMU
432
- unsigned memi = get_mmuidx(oi);
433
- TCGReg addrz;
434
- const tcg_insn_unit *func;
435
+ [MO_BEUW] = LDUH,
436
+ [MO_BESW] = LDSH,
437
+ [MO_BEUL] = LDUW,
438
+ [MO_BESL] = LDSW,
439
+ [MO_BEUQ] = LDX,
440
+ [MO_BESQ] = LDX,
441
442
- addrz = tcg_out_tlb_load(s, addr, memi, memop,
443
- offsetof(CPUTLBEntry, addr_read));
444
+ [MO_LEUW] = LDUH_LE,
445
+ [MO_LESW] = LDSH_LE,
446
+ [MO_LEUL] = LDUW_LE,
447
+ [MO_LESL] = LDSW_LE,
448
+ [MO_LEUQ] = LDX_LE,
449
+ [MO_LESQ] = LDX_LE,
450
+ };
451
452
- /* The fast path is exactly one insn. Thus we can perform the
453
- entire TLB Hit in the (annulled) delay slot of the branch
454
- over the TLB Miss case. */
455
+ TCGLabelQemuLdst *ldst;
456
+ HostAddress h;
457
458
- /* beq,a,pt %[xi]cc, label0 */
459
- label_ptr = s->code_ptr;
460
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
461
- | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
462
- /* delay slot */
463
- tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
464
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
465
+ ldst = prepare_host_addr(s, &h, addr, oi, true);
466
467
- /* TLB Miss. */
468
+ tcg_out_ldst_rr(s, data, h.base, h.index,
469
+ ld_opc[get_memop(oi) & (MO_BSWAP | MO_SSIZE)]);
470
471
- tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
472
-
473
- /* We use the helpers to extend SB and SW data, leaving the case
474
- of SL needing explicit extending below. */
475
- if ((memop & MO_SSIZE) == MO_SL) {
476
- func = qemu_ld_trampoline[MO_UL];
477
- } else {
478
- func = qemu_ld_trampoline[memop & MO_SSIZE];
479
+ if (ldst) {
480
+ ldst->type = data_type;
481
+ ldst->datalo_reg = data;
482
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
186
}
483
}
187
484
- tcg_debug_assert(func != NULL);
188
- overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
485
- tcg_out_call_nodelay(s, func, false);
189
+ divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
486
- /* delay slot */
190
487
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
191
if (sgn_dvdnd ^ sgn_divsr) {
488
-
192
*plow = 0 - *plow;
489
- /* We let the helper sign-extend SB and SW, but leave SL for here. */
490
- if ((memop & MO_SSIZE) == MO_SL) {
491
- tcg_out_ext32s(s, data, TCG_REG_O0);
492
- } else {
493
- tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
494
- }
495
-
496
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
497
-#else
498
- TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
499
- unsigned a_bits = get_alignment_bits(memop);
500
- unsigned s_bits = memop & MO_SIZE;
501
- unsigned t_bits;
502
-
503
- if (TARGET_LONG_BITS == 32) {
504
- tcg_out_ext32u(s, TCG_REG_T1, addr);
505
- addr = TCG_REG_T1;
506
- }
507
-
508
- /*
509
- * Normal case: alignment equal to access size.
510
- */
511
- if (a_bits == s_bits) {
512
- tcg_out_ldst_rr(s, data, addr, index,
513
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
514
- return;
515
- }
516
-
517
- /*
518
- * Test for at least natural alignment, and assume most accesses
519
- * will be aligned -- perform a straight load in the delay slot.
520
- * This is required to preserve atomicity for aligned accesses.
521
- */
522
- t_bits = MAX(a_bits, s_bits);
523
- tcg_debug_assert(t_bits < 13);
524
- tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
525
-
526
- /* beq,a,pt %icc, label */
527
- label_ptr = s->code_ptr;
528
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
529
- /* delay slot */
530
- tcg_out_ldst_rr(s, data, addr, index,
531
- qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]);
532
-
533
- if (a_bits >= s_bits) {
534
- /*
535
- * Overalignment: A successful alignment test will perform the memory
536
- * operation in the delay slot, and failure need only invoke the
537
- * handler for SIGBUS.
538
- */
539
- tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false);
540
- /* delay slot -- move to low part of argument reg */
541
- tcg_out_mov_delay(s, TCG_REG_O1, addr);
542
- } else {
543
- /* Underalignment: load by pieces of minimum alignment. */
544
- int ld_opc, a_size, s_size, i;
545
-
546
- /*
547
- * Force full address into T1 early; avoids problems with
548
- * overlap between @addr and @data.
549
- */
550
- tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
551
-
552
- a_size = 1 << a_bits;
553
- s_size = 1 << s_bits;
554
- if ((memop & MO_BSWAP) == MO_BE) {
555
- ld_opc = qemu_ld_opc[a_bits | MO_BE | (memop & MO_SIGN)];
556
- tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
557
- ld_opc = qemu_ld_opc[a_bits | MO_BE];
558
- for (i = a_size; i < s_size; i += a_size) {
559
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
560
- tcg_out_arithi(s, data, data, a_size, SHIFT_SLLX);
561
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
562
- }
563
- } else if (a_bits == 0) {
564
- ld_opc = LDUB;
565
- tcg_out_ldst(s, data, TCG_REG_T1, 0, ld_opc);
566
- for (i = a_size; i < s_size; i += a_size) {
567
- if ((memop & MO_SIGN) && i == s_size - a_size) {
568
- ld_opc = LDSB;
569
- }
570
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, ld_opc);
571
- tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
572
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
573
- }
574
- } else {
575
- ld_opc = qemu_ld_opc[a_bits | MO_LE];
576
- tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, ld_opc);
577
- for (i = a_size; i < s_size; i += a_size) {
578
- tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
579
- if ((memop & MO_SIGN) && i == s_size - a_size) {
580
- ld_opc = qemu_ld_opc[a_bits | MO_LE | MO_SIGN];
581
- }
582
- tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, ld_opc);
583
- tcg_out_arithi(s, TCG_REG_T2, TCG_REG_T2, i * 8, SHIFT_SLLX);
584
- tcg_out_arith(s, data, data, TCG_REG_T2, ARITH_OR);
585
- }
586
- }
587
- }
588
-
589
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
590
-#endif /* CONFIG_SOFTMMU */
591
}
592
593
static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
594
MemOpIdx oi, TCGType data_type)
595
{
596
- MemOp memop = get_memop(oi);
597
- tcg_insn_unit *label_ptr;
598
+ static const int st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
599
+ [MO_UB] = STB,
600
601
-#ifdef CONFIG_SOFTMMU
602
- unsigned memi = get_mmuidx(oi);
603
- TCGReg addrz;
604
- const tcg_insn_unit *func;
605
+ [MO_BEUW] = STH,
606
+ [MO_BEUL] = STW,
607
+ [MO_BEUQ] = STX,
608
609
- addrz = tcg_out_tlb_load(s, addr, memi, memop,
610
- offsetof(CPUTLBEntry, addr_write));
611
+ [MO_LEUW] = STH_LE,
612
+ [MO_LEUL] = STW_LE,
613
+ [MO_LEUQ] = STX_LE,
614
+ };
615
616
- /* The fast path is exactly one insn. Thus we can perform the entire
617
- TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
618
- /* beq,a,pt %[xi]cc, label0 */
619
- label_ptr = s->code_ptr;
620
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
621
- | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
622
- /* delay slot */
623
- tcg_out_ldst_rr(s, data, addrz, TCG_REG_O1,
624
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
625
+ TCGLabelQemuLdst *ldst;
626
+ HostAddress h;
627
628
- /* TLB Miss. */
629
+ ldst = prepare_host_addr(s, &h, addr, oi, false);
630
631
- tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
632
- tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
633
- TCG_REG_O2, data_type, memop & MO_SIZE, data);
634
+ tcg_out_ldst_rr(s, data, h.base, h.index,
635
+ st_opc[get_memop(oi) & (MO_BSWAP | MO_SIZE)]);
636
637
- func = qemu_st_trampoline[memop & MO_SIZE];
638
- tcg_debug_assert(func != NULL);
639
- tcg_out_call_nodelay(s, func, false);
640
- /* delay slot */
641
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi);
642
-
643
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
644
-#else
645
- TCGReg index = (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0);
646
- unsigned a_bits = get_alignment_bits(memop);
647
- unsigned s_bits = memop & MO_SIZE;
648
- unsigned t_bits;
649
-
650
- if (TARGET_LONG_BITS == 32) {
651
- tcg_out_ext32u(s, TCG_REG_T1, addr);
652
- addr = TCG_REG_T1;
653
+ if (ldst) {
654
+ ldst->type = data_type;
655
+ ldst->datalo_reg = data;
656
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
193
}
657
}
194
-
658
-
195
- if (!overflow) {
659
- /*
196
- if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
660
- * Normal case: alignment equal to access size.
197
- overflow = 1;
661
- */
662
- if (a_bits == s_bits) {
663
- tcg_out_ldst_rr(s, data, addr, index,
664
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
665
- return;
666
- }
667
-
668
- /*
669
- * Test for at least natural alignment, and assume most accesses
670
- * will be aligned -- perform a straight store in the delay slot.
671
- * This is required to preserve atomicity for aligned accesses.
672
- */
673
- t_bits = MAX(a_bits, s_bits);
674
- tcg_debug_assert(t_bits < 13);
675
- tcg_out_arithi(s, TCG_REG_G0, addr, (1u << t_bits) - 1, ARITH_ANDCC);
676
-
677
- /* beq,a,pt %icc, label */
678
- label_ptr = s->code_ptr;
679
- tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT | BPCC_ICC, 0);
680
- /* delay slot */
681
- tcg_out_ldst_rr(s, data, addr, index,
682
- qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]);
683
-
684
- if (a_bits >= s_bits) {
685
- /*
686
- * Overalignment: A successful alignment test will perform the memory
687
- * operation in the delay slot, and failure need only invoke the
688
- * handler for SIGBUS.
689
- */
690
- tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false);
691
- /* delay slot -- move to low part of argument reg */
692
- tcg_out_mov_delay(s, TCG_REG_O1, addr);
693
- } else {
694
- /* Underalignment: store by pieces of minimum alignment. */
695
- int st_opc, a_size, s_size, i;
696
-
697
- /*
698
- * Force full address into T1 early; avoids problems with
699
- * overlap between @addr and @data.
700
- */
701
- tcg_out_arith(s, TCG_REG_T1, addr, index, ARITH_ADD);
702
-
703
- a_size = 1 << a_bits;
704
- s_size = 1 << s_bits;
705
- if ((memop & MO_BSWAP) == MO_BE) {
706
- st_opc = qemu_st_opc[a_bits | MO_BE];
707
- for (i = 0; i < s_size; i += a_size) {
708
- TCGReg d = data;
709
- int shift = (s_size - a_size - i) * 8;
710
- if (shift) {
711
- d = TCG_REG_T2;
712
- tcg_out_arithi(s, d, data, shift, SHIFT_SRLX);
713
- }
714
- tcg_out_ldst(s, d, TCG_REG_T1, i, st_opc);
715
- }
716
- } else if (a_bits == 0) {
717
- tcg_out_ldst(s, data, TCG_REG_T1, 0, STB);
718
- for (i = 1; i < s_size; i++) {
719
- tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
720
- tcg_out_ldst(s, TCG_REG_T2, TCG_REG_T1, i, STB);
721
- }
722
- } else {
723
- /* Note that ST*A with immediate asi must use indexed address. */
724
- st_opc = qemu_st_opc[a_bits + MO_LE];
725
- tcg_out_ldst_rr(s, data, TCG_REG_T1, TCG_REG_G0, st_opc);
726
- for (i = a_size; i < s_size; i += a_size) {
727
- tcg_out_arithi(s, TCG_REG_T2, data, i * 8, SHIFT_SRLX);
728
- tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, a_size, ARITH_ADD);
729
- tcg_out_ldst_rr(s, TCG_REG_T2, TCG_REG_T1, TCG_REG_G0, st_opc);
730
- }
198
- }
731
- }
199
- }
732
- }
200
-
733
-
201
- return overflow;
734
- *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr));
735
-#endif /* CONFIG_SOFTMMU */
202
}
736
}
203
#endif
737
204
738
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
739
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
740
case INDEX_op_extu_i32_i64:
741
case INDEX_op_extrl_i64_i32:
742
case INDEX_op_extrh_i64_i32:
743
+ case INDEX_op_qemu_ld_i32:
744
+ case INDEX_op_qemu_ld_i64:
745
return C_O1_I1(r, r);
746
747
case INDEX_op_st8_i32:
748
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
749
case INDEX_op_st_i32:
750
case INDEX_op_st32_i64:
751
case INDEX_op_st_i64:
752
+ case INDEX_op_qemu_st_i32:
753
+ case INDEX_op_qemu_st_i64:
754
return C_O0_I2(rZ, r);
755
756
case INDEX_op_add_i32:
757
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
758
case INDEX_op_muluh_i64:
759
return C_O1_I2(r, r, r);
760
761
- case INDEX_op_qemu_ld_i32:
762
- case INDEX_op_qemu_ld_i64:
763
- return C_O1_I1(r, s);
764
- case INDEX_op_qemu_st_i32:
765
- case INDEX_op_qemu_st_i64:
766
- return C_O0_I2(sZ, s);
767
-
768
default:
769
g_assert_not_reached();
770
}
205
--
771
--
206
2.25.1
772
2.34.1
207
208
diff view generated by jsdifflib
New patch
1
These functions are now unused.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg-ldst.h | 6 ------
7
accel/tcg/user-exec.c | 10 ----------
8
2 files changed, 16 deletions(-)
9
10
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/include/tcg/tcg-ldst.h
13
+++ b/include/tcg/tcg-ldst.h
14
@@ -XXX,XX +XXX,XX @@ void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
15
void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
16
MemOpIdx oi, uintptr_t retaddr);
17
18
-#ifdef CONFIG_USER_ONLY
19
-
20
-G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
21
-G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
22
-
23
-#endif /* CONFIG_USER_ONLY */
24
#endif /* TCG_LDST_H */
25
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/accel/tcg/user-exec.c
28
+++ b/accel/tcg/user-exec.c
29
@@ -XXX,XX +XXX,XX @@ void page_reset_target_data(target_ulong start, target_ulong last) { }
30
31
/* The softmmu versions of these helpers are in cputlb.c. */
32
33
-void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
34
-{
35
- cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
36
-}
37
-
38
-void helper_unaligned_st(CPUArchState *env, target_ulong addr)
39
-{
40
- cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
41
-}
42
-
43
static void *cpu_mmu_lookup(CPUArchState *env, abi_ptr addr,
44
MemOp mop, uintptr_t ra, MMUAccessType type)
45
{
46
--
47
2.34.1
diff view generated by jsdifflib
1
Rename to fold_addsub2.
1
This should be true of all loongarch64 running Linux.
2
Use Int128 to implement the wider operation.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
6
tcg/loongarch64/tcg-target.c.inc | 9 +++++++++
10
1 file changed, 44 insertions(+), 21 deletions(-)
7
1 file changed, 9 insertions(+)
11
8
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
11
--- a/tcg/loongarch64/tcg-target.c.inc
15
+++ b/tcg/optimize.c
12
+++ b/tcg/loongarch64/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@
17
*/
14
*/
18
15
19
#include "qemu/osdep.h"
16
#include "../tcg-ldst.c.inc"
20
+#include "qemu/int128.h"
17
+#include <asm/hwcap.h>
21
#include "tcg/tcg-op.h"
18
22
#include "tcg-internal.h"
19
#ifdef CONFIG_DEBUG_TCG
23
20
static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
24
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
21
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
25
return false;
22
26
}
23
static void tcg_target_init(TCGContext *s)
27
28
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
29
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
30
{
24
{
31
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
25
+ unsigned long hwcap = qemu_getauxval(AT_HWCAP);
32
arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
33
- uint32_t al = arg_info(op->args[2])->val;
34
- uint32_t ah = arg_info(op->args[3])->val;
35
- uint32_t bl = arg_info(op->args[4])->val;
36
- uint32_t bh = arg_info(op->args[5])->val;
37
- uint64_t a = ((uint64_t)ah << 32) | al;
38
- uint64_t b = ((uint64_t)bh << 32) | bl;
39
+ uint64_t al = arg_info(op->args[2])->val;
40
+ uint64_t ah = arg_info(op->args[3])->val;
41
+ uint64_t bl = arg_info(op->args[4])->val;
42
+ uint64_t bh = arg_info(op->args[5])->val;
43
TCGArg rl, rh;
44
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
45
+ TCGOp *op2;
46
47
- if (add) {
48
- a += b;
49
+ if (ctx->type == TCG_TYPE_I32) {
50
+ uint64_t a = deposit64(al, 32, 32, ah);
51
+ uint64_t b = deposit64(bl, 32, 32, bh);
52
+
26
+
53
+ if (add) {
27
+ /* Server and desktop class cpus have UAL; embedded cpus do not. */
54
+ a += b;
28
+ if (!(hwcap & HWCAP_LOONGARCH_UAL)) {
55
+ } else {
29
+ error_report("TCG: unaligned access support required; exiting");
56
+ a -= b;
30
+ exit(EXIT_FAILURE);
57
+ }
31
+ }
58
+
32
+
59
+ al = sextract64(a, 0, 32);
33
tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
60
+ ah = sextract64(a, 32, 32);
34
tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
61
} else {
35
62
- a -= b;
63
+ Int128 a = int128_make128(al, ah);
64
+ Int128 b = int128_make128(bl, bh);
65
+
66
+ if (add) {
67
+ a = int128_add(a, b);
68
+ } else {
69
+ a = int128_sub(a, b);
70
+ }
71
+
72
+ al = int128_getlo(a);
73
+ ah = int128_gethi(a);
74
}
75
76
rl = op->args[0];
77
rh = op->args[1];
78
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
79
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
80
+
81
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
82
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
83
+
84
+ tcg_opt_gen_movi(ctx, op, rl, al);
85
+ tcg_opt_gen_movi(ctx, op2, rh, ah);
86
return true;
87
}
88
return false;
89
}
90
91
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
92
+static bool fold_add2(OptContext *ctx, TCGOp *op)
93
{
94
- return fold_addsub2_i32(ctx, op, true);
95
+ return fold_addsub2(ctx, op, true);
96
}
97
98
static bool fold_and(OptContext *ctx, TCGOp *op)
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
100
return false;
101
}
102
103
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
104
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
105
{
106
- return fold_addsub2_i32(ctx, op, false);
107
+ return fold_addsub2(ctx, op, false);
108
}
109
110
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
CASE_OP_32_64_VEC(add):
113
done = fold_add(&ctx, op);
114
break;
115
- case INDEX_op_add2_i32:
116
- done = fold_add2_i32(&ctx, op);
117
+ CASE_OP_32_64(add2):
118
+ done = fold_add2(&ctx, op);
119
break;
120
CASE_OP_32_64_VEC(and):
121
done = fold_and(&ctx, op);
122
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
123
CASE_OP_32_64_VEC(sub):
124
done = fold_sub(&ctx, op);
125
break;
126
- case INDEX_op_sub2_i32:
127
- done = fold_sub2_i32(&ctx, op);
128
+ CASE_OP_32_64(sub2):
129
+ done = fold_sub2(&ctx, op);
130
break;
131
CASE_OP_32_64_VEC(xor):
132
done = fold_xor(&ctx, op);
133
--
36
--
134
2.25.1
37
2.34.1
135
136
diff view generated by jsdifflib
New patch
1
Test the final byte of an unaligned access.
2
Use BSTRINS.D to clear the range of bits, rather than AND.
1
3
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/loongarch64/tcg-target.c.inc | 19 ++++++++++++-------
8
1 file changed, 12 insertions(+), 7 deletions(-)
9
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/loongarch64/tcg-target.c.inc
13
+++ b/tcg/loongarch64/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
15
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
16
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
17
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
18
- tcg_target_long compare_mask;
19
20
ldst = new_ldst_label(s);
21
ldst->is_ld = is_ld;
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
23
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
24
offsetof(CPUTLBEntry, addend));
25
26
- /* We don't support unaligned accesses. */
27
+ /*
28
+ * For aligned accesses, we check the first byte and include the alignment
29
+ * bits within the address. For unaligned access, we check that we don't
30
+ * cross pages using the address of the last byte of the access.
31
+ */
32
if (a_bits < s_bits) {
33
- a_bits = s_bits;
34
+ unsigned a_mask = (1u << a_bits) - 1;
35
+ unsigned s_mask = (1u << s_bits) - 1;
36
+ tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
37
+ } else {
38
+ tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg);
39
}
40
- /* Clear the non-page, non-alignment bits from the address. */
41
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
42
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
43
- tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
44
+ tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
45
+ a_bits, TARGET_PAGE_BITS - 1);
46
47
/* Compare masked address with the TLB entry. */
48
ldst->label_ptr[0] = s->code_ptr;
49
--
50
2.34.1
diff view generated by jsdifflib
New patch
1
The system is required to emulate unaligned accesses, even if the
2
hardware does not support it. The resulting trap may or may not
3
be more efficient than the qemu slow path. There are linux kernel
4
patches in flight to allow userspace to query hardware support;
5
we can re-evaluate whether to enable this by default after that.
1
6
7
In the meantime, softmmu now matches useronly, where we already
8
assumed that unaligned accesses are supported.
9
10
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/riscv/tcg-target.c.inc | 48 ++++++++++++++++++++++----------------
14
1 file changed, 28 insertions(+), 20 deletions(-)
15
16
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/riscv/tcg-target.c.inc
19
+++ b/tcg/riscv/tcg-target.c.inc
20
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
21
22
#ifdef CONFIG_SOFTMMU
23
unsigned s_bits = opc & MO_SIZE;
24
+ unsigned s_mask = (1u << s_bits) - 1;
25
int mem_index = get_mmuidx(oi);
26
int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
27
int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
28
int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
29
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
30
- tcg_target_long compare_mask;
31
+ int compare_mask;
32
+ TCGReg addr_adj;
33
34
ldst = new_ldst_label(s);
35
ldst->is_ld = is_ld;
36
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
37
38
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
39
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
40
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
41
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
42
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
43
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
44
45
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
46
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
47
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
48
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
49
50
+ /*
51
+ * For aligned accesses, we check the first byte and include the alignment
52
+ * bits within the address. For unaligned access, we check that we don't
53
+ * cross pages using the address of the last byte of the access.
54
+ */
55
+ addr_adj = addr_reg;
56
+ if (a_bits < s_bits) {
57
+ addr_adj = TCG_REG_TMP0;
58
+ tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
59
+ addr_adj, addr_reg, s_mask - a_mask);
60
+ }
61
+ compare_mask = TARGET_PAGE_MASK | a_mask;
62
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
63
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
64
+ } else {
65
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
66
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_adj);
67
+ }
68
+
69
/* Load the tlb comparator and the addend. */
70
tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
71
is_ld ? offsetof(CPUTLBEntry, addr_read)
72
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
73
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
74
offsetof(CPUTLBEntry, addend));
75
76
- /* We don't support unaligned accesses. */
77
- if (a_bits < s_bits) {
78
- a_bits = s_bits;
79
- }
80
- /* Clear the non-page, non-alignment bits from the address. */
81
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
82
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
83
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
84
- } else {
85
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
86
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
87
- }
88
-
89
/* Compare masked address with the TLB entry. */
90
ldst->label_ptr[0] = s->code_ptr;
91
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
92
93
/* TLB Hit - translate address using addend. */
94
+ addr_adj = addr_reg;
95
if (TARGET_LONG_BITS == 32) {
96
- tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
97
- addr_reg = TCG_REG_TMP0;
98
+ addr_adj = TCG_REG_TMP0;
99
+ tcg_out_ext32u(s, addr_adj, addr_reg);
100
}
101
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
102
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_adj);
103
*pbase = TCG_REG_TMP0;
104
#else
105
if (a_mask) {
106
--
107
2.34.1
diff view generated by jsdifflib
1
This puts the separate mb optimization into the same framework
1
Replace the unparameterized TCG_TARGET_HAS_MEMORY_BSWAP macro
2
as the others. While fold_qemu_{ld,st} are currently identical,
2
with a function with a memop argument.
3
that won't last as more code gets moved.
4
3
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
7
tcg/aarch64/tcg-target.h | 1 -
10
1 file changed, 51 insertions(+), 38 deletions(-)
8
tcg/arm/tcg-target.h | 1 -
9
tcg/i386/tcg-target.h | 3 ---
10
tcg/loongarch64/tcg-target.h | 2 --
11
tcg/mips/tcg-target.h | 2 --
12
tcg/ppc/tcg-target.h | 1 -
13
tcg/riscv/tcg-target.h | 2 --
14
tcg/s390x/tcg-target.h | 2 --
15
tcg/sparc64/tcg-target.h | 1 -
16
tcg/tcg-internal.h | 2 ++
17
tcg/tci/tcg-target.h | 2 --
18
tcg/tcg-op.c | 20 +++++++++++---------
19
tcg/aarch64/tcg-target.c.inc | 5 +++++
20
tcg/arm/tcg-target.c.inc | 5 +++++
21
tcg/i386/tcg-target.c.inc | 5 +++++
22
tcg/loongarch64/tcg-target.c.inc | 5 +++++
23
tcg/mips/tcg-target.c.inc | 5 +++++
24
tcg/ppc/tcg-target.c.inc | 5 +++++
25
tcg/riscv/tcg-target.c.inc | 5 +++++
26
tcg/s390x/tcg-target.c.inc | 5 +++++
27
tcg/sparc64/tcg-target.c.inc | 5 +++++
28
tcg/tci/tcg-target.c.inc | 5 +++++
29
22 files changed, 63 insertions(+), 26 deletions(-)
11
30
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
31
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
33
--- a/tcg/aarch64/tcg-target.h
15
+++ b/tcg/optimize.c
34
+++ b/tcg/aarch64/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
35
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
17
return true;
36
#define TCG_TARGET_HAS_cmpsel_vec 0
37
38
#define TCG_TARGET_DEFAULT_MO (0)
39
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
40
#define TCG_TARGET_NEED_LDST_LABELS
41
#define TCG_TARGET_NEED_POOL_LABELS
42
43
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/tcg/arm/tcg-target.h
46
+++ b/tcg/arm/tcg-target.h
47
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
48
#define TCG_TARGET_HAS_cmpsel_vec 0
49
50
#define TCG_TARGET_DEFAULT_MO (0)
51
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
52
#define TCG_TARGET_NEED_LDST_LABELS
53
#define TCG_TARGET_NEED_POOL_LABELS
54
55
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
56
index XXXXXXX..XXXXXXX 100644
57
--- a/tcg/i386/tcg-target.h
58
+++ b/tcg/i386/tcg-target.h
59
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
60
#include "tcg/tcg-mo.h"
61
62
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
63
-
64
-#define TCG_TARGET_HAS_MEMORY_BSWAP have_movbe
65
-
66
#define TCG_TARGET_NEED_LDST_LABELS
67
#define TCG_TARGET_NEED_POOL_LABELS
68
69
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
70
index XXXXXXX..XXXXXXX 100644
71
--- a/tcg/loongarch64/tcg-target.h
72
+++ b/tcg/loongarch64/tcg-target.h
73
@@ -XXX,XX +XXX,XX @@ typedef enum {
74
75
#define TCG_TARGET_NEED_LDST_LABELS
76
77
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
78
-
79
#endif /* LOONGARCH_TCG_TARGET_H */
80
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/mips/tcg-target.h
83
+++ b/tcg/mips/tcg-target.h
84
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
85
#endif
86
87
#define TCG_TARGET_DEFAULT_MO 0
88
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
89
-
90
#define TCG_TARGET_NEED_LDST_LABELS
91
92
#endif
93
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/tcg/ppc/tcg-target.h
96
+++ b/tcg/ppc/tcg-target.h
97
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
98
#define TCG_TARGET_HAS_cmpsel_vec 0
99
100
#define TCG_TARGET_DEFAULT_MO (0)
101
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
102
#define TCG_TARGET_NEED_LDST_LABELS
103
#define TCG_TARGET_NEED_POOL_LABELS
104
105
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/tcg/riscv/tcg-target.h
108
+++ b/tcg/riscv/tcg-target.h
109
@@ -XXX,XX +XXX,XX @@ typedef enum {
110
#define TCG_TARGET_NEED_LDST_LABELS
111
#define TCG_TARGET_NEED_POOL_LABELS
112
113
-#define TCG_TARGET_HAS_MEMORY_BSWAP 0
114
-
115
#endif
116
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
117
index XXXXXXX..XXXXXXX 100644
118
--- a/tcg/s390x/tcg-target.h
119
+++ b/tcg/s390x/tcg-target.h
120
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
121
#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_BY_REF
122
#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_BY_REF
123
124
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
125
-
126
#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
127
#define TCG_TARGET_NEED_LDST_LABELS
128
#define TCG_TARGET_NEED_POOL_LABELS
129
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
130
index XXXXXXX..XXXXXXX 100644
131
--- a/tcg/sparc64/tcg-target.h
132
+++ b/tcg/sparc64/tcg-target.h
133
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
134
#define TCG_AREG0 TCG_REG_I0
135
136
#define TCG_TARGET_DEFAULT_MO (0)
137
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
138
#define TCG_TARGET_NEED_LDST_LABELS
139
#define TCG_TARGET_NEED_POOL_LABELS
140
141
diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
142
index XXXXXXX..XXXXXXX 100644
143
--- a/tcg/tcg-internal.h
144
+++ b/tcg/tcg-internal.h
145
@@ -XXX,XX +XXX,XX @@ static inline TCGv_i64 TCGV128_HIGH(TCGv_i128 t)
146
return temp_tcgv_i64(tcgv_i128_temp(t) + o);
18
}
147
}
19
148
20
+static bool fold_mb(OptContext *ctx, TCGOp *op)
149
+bool tcg_target_has_memory_bswap(MemOp memop);
21
+{
150
+
22
+ /* Eliminate duplicate and redundant fence instructions. */
151
#endif /* TCG_INTERNAL_H */
23
+ if (ctx->prev_mb) {
152
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
24
+ /*
153
index XXXXXXX..XXXXXXX 100644
25
+ * Merge two barriers of the same type into one,
154
--- a/tcg/tci/tcg-target.h
26
+ * or a weaker barrier into a stronger one,
155
+++ b/tcg/tci/tcg-target.h
27
+ * or two weaker barriers into a stronger one.
156
@@ -XXX,XX +XXX,XX @@ typedef enum {
28
+ * mb X; mb Y => mb X|Y
157
We prefer consistency across hosts on this. */
29
+ * mb; strl => mb; st
158
#define TCG_TARGET_DEFAULT_MO (0)
30
+ * ldaq; mb => ld; mb
159
31
+ * ldaq; strl => ld; mb; st
160
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
32
+ * Other combinations are also merged into a strong
161
-
33
+ * barrier. This is stricter than specified but for
162
#endif /* TCG_TARGET_H */
34
+ * the purposes of TCG is better than not optimizing.
163
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
35
+ */
164
index XXXXXXX..XXXXXXX 100644
36
+ ctx->prev_mb->args[0] |= op->args[0];
165
--- a/tcg/tcg-op.c
37
+ tcg_op_remove(ctx->tcg, op);
166
+++ b/tcg/tcg-op.c
38
+ } else {
167
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
39
+ ctx->prev_mb = op;
168
oi = make_memop_idx(memop, idx);
169
170
orig_memop = memop;
171
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
172
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
173
memop &= ~MO_BSWAP;
174
/* The bswap primitive benefits from zero-extended input. */
175
if ((memop & MO_SSIZE) == MO_SW) {
176
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
177
memop = tcg_canonicalize_memop(memop, 0, 1);
178
oi = make_memop_idx(memop, idx);
179
180
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
181
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
182
swap = tcg_temp_ebb_new_i32();
183
switch (memop & MO_SIZE) {
184
case MO_16:
185
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
186
oi = make_memop_idx(memop, idx);
187
188
orig_memop = memop;
189
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
190
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
191
memop &= ~MO_BSWAP;
192
/* The bswap primitive benefits from zero-extended input. */
193
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
194
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
195
memop = tcg_canonicalize_memop(memop, 1, 1);
196
oi = make_memop_idx(memop, idx);
197
198
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (memop & MO_BSWAP)) {
199
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
200
swap = tcg_temp_ebb_new_i64();
201
switch (memop & MO_SIZE) {
202
case MO_16:
203
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
204
tcg_debug_assert((orig & MO_SIZE) == MO_128);
205
tcg_debug_assert((orig & MO_SIGN) == 0);
206
207
- /* Use a memory ordering implemented by the host. */
208
- if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
209
- mop_1 &= ~MO_BSWAP;
210
- }
211
-
212
/* Reduce the size to 64-bit. */
213
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
214
215
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
216
default:
217
g_assert_not_reached();
218
}
219
+
220
+ /* Use a memory ordering implemented by the host. */
221
+ if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
222
+ mop_1 &= ~MO_BSWAP;
223
+ mop_2 &= ~MO_BSWAP;
40
+ }
224
+ }
225
+
226
ret[0] = mop_1;
227
ret[1] = mop_2;
228
}
229
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
230
index XXXXXXX..XXXXXXX 100644
231
--- a/tcg/aarch64/tcg-target.c.inc
232
+++ b/tcg/aarch64/tcg-target.c.inc
233
@@ -XXX,XX +XXX,XX @@ typedef struct {
234
TCGType index_ext;
235
} HostAddress;
236
237
+bool tcg_target_has_memory_bswap(MemOp memop)
238
+{
239
+ return false;
240
+}
241
+
242
static const TCGLdstHelperParam ldst_helper_param = {
243
.ntmp = 1, .tmp = { TCG_REG_TMP }
244
};
245
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
246
index XXXXXXX..XXXXXXX 100644
247
--- a/tcg/arm/tcg-target.c.inc
248
+++ b/tcg/arm/tcg-target.c.inc
249
@@ -XXX,XX +XXX,XX @@ typedef struct {
250
bool index_scratch;
251
} HostAddress;
252
253
+bool tcg_target_has_memory_bswap(MemOp memop)
254
+{
255
+ return false;
256
+}
257
+
258
static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
259
{
260
/* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
261
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
262
index XXXXXXX..XXXXXXX 100644
263
--- a/tcg/i386/tcg-target.c.inc
264
+++ b/tcg/i386/tcg-target.c.inc
265
@@ -XXX,XX +XXX,XX @@ typedef struct {
266
int seg;
267
} HostAddress;
268
269
+bool tcg_target_has_memory_bswap(MemOp memop)
270
+{
271
+ return have_movbe;
272
+}
273
+
274
/*
275
* Because i686 has no register parameters and because x86_64 has xchg
276
* to handle addr/data register overlap, we have placed all input arguments
277
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
278
index XXXXXXX..XXXXXXX 100644
279
--- a/tcg/loongarch64/tcg-target.c.inc
280
+++ b/tcg/loongarch64/tcg-target.c.inc
281
@@ -XXX,XX +XXX,XX @@ typedef struct {
282
TCGReg index;
283
} HostAddress;
284
285
+bool tcg_target_has_memory_bswap(MemOp memop)
286
+{
287
+ return false;
288
+}
289
+
290
/*
291
* For softmmu, perform the TLB load and compare.
292
* For useronly, perform any required alignment tests.
293
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
294
index XXXXXXX..XXXXXXX 100644
295
--- a/tcg/mips/tcg-target.c.inc
296
+++ b/tcg/mips/tcg-target.c.inc
297
@@ -XXX,XX +XXX,XX @@ typedef struct {
298
MemOp align;
299
} HostAddress;
300
301
+bool tcg_target_has_memory_bswap(MemOp memop)
302
+{
303
+ return false;
304
+}
305
+
306
/*
307
* For softmmu, perform the TLB load and compare.
308
* For useronly, perform any required alignment tests.
309
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
310
index XXXXXXX..XXXXXXX 100644
311
--- a/tcg/ppc/tcg-target.c.inc
312
+++ b/tcg/ppc/tcg-target.c.inc
313
@@ -XXX,XX +XXX,XX @@ typedef struct {
314
TCGReg index;
315
} HostAddress;
316
317
+bool tcg_target_has_memory_bswap(MemOp memop)
318
+{
41
+ return true;
319
+ return true;
42
+}
320
+}
43
+
321
+
44
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
322
/*
45
+{
323
* For softmmu, perform the TLB load and compare.
46
+ /* Opcodes that touch guest memory stop the mb optimization. */
324
* For useronly, perform any required alignment tests.
47
+ ctx->prev_mb = NULL;
325
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
48
+ return false;
326
index XXXXXXX..XXXXXXX 100644
49
+}
327
--- a/tcg/riscv/tcg-target.c.inc
50
+
328
+++ b/tcg/riscv/tcg-target.c.inc
51
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
329
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
52
+{
330
tcg_debug_assert(ok);
53
+ /* Opcodes that touch guest memory stop the mb optimization. */
331
}
54
+ ctx->prev_mb = NULL;
332
55
+ return false;
333
+bool tcg_target_has_memory_bswap(MemOp memop)
56
+}
334
+{
57
+
335
+ return false;
58
/* Propagate constants and copies, fold constant expressions. */
336
+}
59
void tcg_optimize(TCGContext *s)
337
+
338
/* We have three temps, we might as well expose them. */
339
static const TCGLdstHelperParam ldst_helper_param = {
340
.ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
341
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
342
index XXXXXXX..XXXXXXX 100644
343
--- a/tcg/s390x/tcg-target.c.inc
344
+++ b/tcg/s390x/tcg-target.c.inc
345
@@ -XXX,XX +XXX,XX @@ typedef struct {
346
int disp;
347
} HostAddress;
348
349
+bool tcg_target_has_memory_bswap(MemOp memop)
350
+{
351
+ return true;
352
+}
353
+
354
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
355
HostAddress h)
60
{
356
{
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
357
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
62
}
358
index XXXXXXX..XXXXXXX 100644
63
break;
359
--- a/tcg/sparc64/tcg-target.c.inc
64
360
+++ b/tcg/sparc64/tcg-target.c.inc
65
+ case INDEX_op_mb:
361
@@ -XXX,XX +XXX,XX @@ typedef struct {
66
+ done = fold_mb(&ctx, op);
362
TCGReg index;
67
+ break;
363
} HostAddress;
68
+ case INDEX_op_qemu_ld_i32:
364
69
+ case INDEX_op_qemu_ld_i64:
365
+bool tcg_target_has_memory_bswap(MemOp memop)
70
+ done = fold_qemu_ld(&ctx, op);
366
+{
71
+ break;
367
+ return true;
72
+ case INDEX_op_qemu_st_i32:
368
+}
73
+ case INDEX_op_qemu_st8_i32:
369
+
74
+ case INDEX_op_qemu_st_i64:
370
/*
75
+ done = fold_qemu_st(&ctx, op);
371
* For softmmu, perform the TLB load and compare.
76
+ break;
372
* For useronly, perform any required alignment tests.
77
+
373
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
78
default:
374
index XXXXXXX..XXXXXXX 100644
79
break;
375
--- a/tcg/tci/tcg-target.c.inc
80
}
376
+++ b/tcg/tci/tcg-target.c.inc
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
377
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
82
if (!done) {
378
static inline void tcg_target_qemu_prologue(TCGContext *s)
83
finish_folding(&ctx, op);
379
{
84
}
85
-
86
- /* Eliminate duplicate and redundant fence instructions. */
87
- if (ctx.prev_mb) {
88
- switch (opc) {
89
- case INDEX_op_mb:
90
- /* Merge two barriers of the same type into one,
91
- * or a weaker barrier into a stronger one,
92
- * or two weaker barriers into a stronger one.
93
- * mb X; mb Y => mb X|Y
94
- * mb; strl => mb; st
95
- * ldaq; mb => ld; mb
96
- * ldaq; strl => ld; mb; st
97
- * Other combinations are also merged into a strong
98
- * barrier. This is stricter than specified but for
99
- * the purposes of TCG is better than not optimizing.
100
- */
101
- ctx.prev_mb->args[0] |= op->args[0];
102
- tcg_op_remove(s, op);
103
- break;
104
-
105
- default:
106
- /* Opcodes that end the block stop the optimization. */
107
- if ((def->flags & TCG_OPF_BB_END) == 0) {
108
- break;
109
- }
110
- /* fallthru */
111
- case INDEX_op_qemu_ld_i32:
112
- case INDEX_op_qemu_ld_i64:
113
- case INDEX_op_qemu_st_i32:
114
- case INDEX_op_qemu_st8_i32:
115
- case INDEX_op_qemu_st_i64:
116
- /* Opcodes that touch guest memory stop the optimization. */
117
- ctx.prev_mb = NULL;
118
- break;
119
- }
120
- } else if (opc == INDEX_op_mb) {
121
- ctx.prev_mb = op;
122
- }
123
}
124
}
380
}
381
+
382
+bool tcg_target_has_memory_bswap(MemOp memop)
383
+{
384
+ return true;
385
+}
125
--
386
--
126
2.25.1
387
2.34.1
127
128
diff view generated by jsdifflib
1
Recognize the identity function for low-part multiply.
1
Add opcodes for backend support for 128-bit memory operations.
2
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 3 ++-
7
docs/devel/tcg-ops.rst | 11 +++---
9
1 file changed, 2 insertions(+), 1 deletion(-)
8
include/tcg/tcg-opc.h | 8 +++++
9
tcg/aarch64/tcg-target.h | 2 ++
10
tcg/arm/tcg-target.h | 2 ++
11
tcg/i386/tcg-target.h | 2 ++
12
tcg/loongarch64/tcg-target.h | 1 +
13
tcg/mips/tcg-target.h | 2 ++
14
tcg/ppc/tcg-target.h | 2 ++
15
tcg/riscv/tcg-target.h | 2 ++
16
tcg/s390x/tcg-target.h | 2 ++
17
tcg/sparc64/tcg-target.h | 2 ++
18
tcg/tci/tcg-target.h | 2 ++
19
tcg/tcg-op.c | 69 ++++++++++++++++++++++++++++++++----
20
tcg/tcg.c | 6 ++++
21
14 files changed, 103 insertions(+), 10 deletions(-)
10
22
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
23
diff --git a/docs/devel/tcg-ops.rst b/docs/devel/tcg-ops.rst
12
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
25
--- a/docs/devel/tcg-ops.rst
14
+++ b/tcg/optimize.c
26
+++ b/docs/devel/tcg-ops.rst
15
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
27
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
16
static bool fold_mul(OptContext *ctx, TCGOp *op)
28
| This operation is optional. If the TCG backend does not implement the
29
goto_ptr opcode, emitting this op is equivalent to emitting exit_tb(0).
30
31
- * - qemu_ld_i32/i64 *t0*, *t1*, *flags*, *memidx*
32
+ * - qemu_ld_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
33
34
- qemu_st_i32/i64 *t0*, *t1*, *flags*, *memidx*
35
+ qemu_st_i32/i64/i128 *t0*, *t1*, *flags*, *memidx*
36
37
qemu_st8_i32 *t0*, *t1*, *flags*, *memidx*
38
39
- | Load data at the guest address *t1* into *t0*, or store data in *t0* at guest
40
- address *t1*. The _i32/_i64 size applies to the size of the input/output
41
+ address *t1*. The _i32/_i64/_i128 size applies to the size of the input/output
42
register *t0* only. The address *t1* is always sized according to the guest,
43
and the width of the memory operation is controlled by *flags*.
44
|
45
| Both *t0* and *t1* may be split into little-endian ordered pairs of registers
46
- if dealing with 64-bit quantities on a 32-bit host.
47
+ if dealing with 64-bit quantities on a 32-bit host, or 128-bit quantities on
48
+ a 64-bit host.
49
|
50
| The *memidx* selects the qemu tlb index to use (e.g. user or kernel access).
51
The flags are the MemOp bits, selecting the sign, width, and endianness
52
@@ -XXX,XX +XXX,XX @@ QEMU specific operations
53
| For a 32-bit host, qemu_ld/st_i64 is guaranteed to only be used with a
54
64-bit memory access specified in *flags*.
55
|
56
+ | For qemu_ld/st_i128, these are only supported for a 64-bit host.
57
+ |
58
| For i386, qemu_st8_i32 is exactly like qemu_st_i32, except the size of
59
the memory operation is known to be 8-bit. This allows the backend to
60
provide a different set of register constraints.
61
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
62
index XXXXXXX..XXXXXXX 100644
63
--- a/include/tcg/tcg-opc.h
64
+++ b/include/tcg/tcg-opc.h
65
@@ -XXX,XX +XXX,XX @@ DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
66
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
67
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
68
69
+/* Only for 64-bit hosts at the moment. */
70
+DEF(qemu_ld_i128, 2, 1, 1,
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
72
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
73
+DEF(qemu_st_i128, 0, 3, 1,
74
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
75
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
76
+
77
/* Host vector support. */
78
79
#define IMPLVEC TCG_OPF_VECTOR | IMPL(TCG_TARGET_MAYBE_vec)
80
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
81
index XXXXXXX..XXXXXXX 100644
82
--- a/tcg/aarch64/tcg-target.h
83
+++ b/tcg/aarch64/tcg-target.h
84
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
85
#define TCG_TARGET_HAS_muluh_i64 1
86
#define TCG_TARGET_HAS_mulsh_i64 1
87
88
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
89
+
90
#define TCG_TARGET_HAS_v64 1
91
#define TCG_TARGET_HAS_v128 1
92
#define TCG_TARGET_HAS_v256 0
93
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
94
index XXXXXXX..XXXXXXX 100644
95
--- a/tcg/arm/tcg-target.h
96
+++ b/tcg/arm/tcg-target.h
97
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
98
#define TCG_TARGET_HAS_rem_i32 0
99
#define TCG_TARGET_HAS_qemu_st8_i32 0
100
101
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
102
+
103
#define TCG_TARGET_HAS_v64 use_neon_instructions
104
#define TCG_TARGET_HAS_v128 use_neon_instructions
105
#define TCG_TARGET_HAS_v256 0
106
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
107
index XXXXXXX..XXXXXXX 100644
108
--- a/tcg/i386/tcg-target.h
109
+++ b/tcg/i386/tcg-target.h
110
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
111
#define TCG_TARGET_HAS_qemu_st8_i32 1
112
#endif
113
114
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
115
+
116
/* We do not support older SSE systems, only beginning with AVX1. */
117
#define TCG_TARGET_HAS_v64 have_avx1
118
#define TCG_TARGET_HAS_v128 have_avx1
119
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
120
index XXXXXXX..XXXXXXX 100644
121
--- a/tcg/loongarch64/tcg-target.h
122
+++ b/tcg/loongarch64/tcg-target.h
123
@@ -XXX,XX +XXX,XX @@ typedef enum {
124
#define TCG_TARGET_HAS_muls2_i64 0
125
#define TCG_TARGET_HAS_muluh_i64 1
126
#define TCG_TARGET_HAS_mulsh_i64 1
127
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
128
129
#define TCG_TARGET_DEFAULT_MO (0)
130
131
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
132
index XXXXXXX..XXXXXXX 100644
133
--- a/tcg/mips/tcg-target.h
134
+++ b/tcg/mips/tcg-target.h
135
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
136
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
137
#endif
138
139
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
140
+
141
#define TCG_TARGET_DEFAULT_MO 0
142
#define TCG_TARGET_NEED_LDST_LABELS
143
144
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
145
index XXXXXXX..XXXXXXX 100644
146
--- a/tcg/ppc/tcg-target.h
147
+++ b/tcg/ppc/tcg-target.h
148
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
149
#define TCG_TARGET_HAS_mulsh_i64 1
150
#endif
151
152
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
153
+
154
/*
155
* While technically Altivec could support V64, it has no 64-bit store
156
* instruction and substituting two 32-bit stores makes the generated
157
diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tcg/riscv/tcg-target.h
160
+++ b/tcg/riscv/tcg-target.h
161
@@ -XXX,XX +XXX,XX @@ typedef enum {
162
#define TCG_TARGET_HAS_muluh_i64 1
163
#define TCG_TARGET_HAS_mulsh_i64 1
164
165
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
166
+
167
#define TCG_TARGET_DEFAULT_MO (0)
168
169
#define TCG_TARGET_NEED_LDST_LABELS
170
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
171
index XXXXXXX..XXXXXXX 100644
172
--- a/tcg/s390x/tcg-target.h
173
+++ b/tcg/s390x/tcg-target.h
174
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
175
#define TCG_TARGET_HAS_muluh_i64 0
176
#define TCG_TARGET_HAS_mulsh_i64 0
177
178
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
179
+
180
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
181
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
182
#define TCG_TARGET_HAS_v256 0
183
diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h
184
index XXXXXXX..XXXXXXX 100644
185
--- a/tcg/sparc64/tcg-target.h
186
+++ b/tcg/sparc64/tcg-target.h
187
@@ -XXX,XX +XXX,XX @@ extern bool use_vis3_instructions;
188
#define TCG_TARGET_HAS_muluh_i64 use_vis3_instructions
189
#define TCG_TARGET_HAS_mulsh_i64 0
190
191
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
192
+
193
#define TCG_AREG0 TCG_REG_I0
194
195
#define TCG_TARGET_DEFAULT_MO (0)
196
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
197
index XXXXXXX..XXXXXXX 100644
198
--- a/tcg/tci/tcg-target.h
199
+++ b/tcg/tci/tcg-target.h
200
@@ -XXX,XX +XXX,XX @@
201
#define TCG_TARGET_HAS_mulu2_i32 1
202
#endif /* TCG_TARGET_REG_BITS == 64 */
203
204
+#define TCG_TARGET_HAS_qemu_ldst_i128 0
205
+
206
/* Number of registers available. */
207
#define TCG_TARGET_NB_REGS 16
208
209
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
210
index XXXXXXX..XXXXXXX 100644
211
--- a/tcg/tcg-op.c
212
+++ b/tcg/tcg-op.c
213
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
214
215
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
17
{
216
{
18
if (fold_const2(ctx, op) ||
217
- MemOpIdx oi = make_memop_idx(memop, idx);
19
- fold_xi_to_i(ctx, op, 0)) {
218
+ const MemOpIdx oi = make_memop_idx(memop, idx);
20
+ fold_xi_to_i(ctx, op, 0) ||
219
21
+ fold_xi_to_x(ctx, op, 1)) {
220
tcg_debug_assert((memop & MO_SIZE) == MO_128);
22
return true;
221
tcg_debug_assert((memop & MO_SIGN) == 0);
23
}
222
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
24
return false;
223
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
224
addr = plugin_prep_mem_callbacks(addr);
225
226
- /* TODO: allow the tcg backend to see the whole operation. */
227
+ /* TODO: For now, force 32-bit hosts to use the helper. */
228
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
229
+ TCGv_i64 lo, hi;
230
+ TCGArg addr_arg;
231
+ MemOpIdx adj_oi;
232
+ bool need_bswap = false;
233
234
- if (use_two_i64_for_i128(memop)) {
235
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
236
+ lo = TCGV128_HIGH(val);
237
+ hi = TCGV128_LOW(val);
238
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
239
+ need_bswap = true;
240
+ } else {
241
+ lo = TCGV128_LOW(val);
242
+ hi = TCGV128_HIGH(val);
243
+ adj_oi = oi;
244
+ }
245
+
246
+#if TARGET_LONG_BITS == 32
247
+ addr_arg = tcgv_i32_arg(addr);
248
+#else
249
+ addr_arg = tcgv_i64_arg(addr);
250
+#endif
251
+ tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
252
+
253
+ if (need_bswap) {
254
+ tcg_gen_bswap64_i64(lo, lo);
255
+ tcg_gen_bswap64_i64(hi, hi);
256
+ }
257
+ } else if (use_two_i64_for_i128(memop)) {
258
MemOp mop[2];
259
TCGv addr_p8;
260
TCGv_i64 x, y;
261
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
262
263
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
264
{
265
- MemOpIdx oi = make_memop_idx(memop, idx);
266
+ const MemOpIdx oi = make_memop_idx(memop, idx);
267
268
tcg_debug_assert((memop & MO_SIZE) == MO_128);
269
tcg_debug_assert((memop & MO_SIGN) == 0);
270
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
271
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
272
addr = plugin_prep_mem_callbacks(addr);
273
274
- /* TODO: allow the tcg backend to see the whole operation. */
275
+ /* TODO: For now, force 32-bit hosts to use the helper. */
276
277
- if (use_two_i64_for_i128(memop)) {
278
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
279
+ TCGv_i64 lo, hi;
280
+ TCGArg addr_arg;
281
+ MemOpIdx adj_oi;
282
+ bool need_bswap = false;
283
+
284
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
285
+ lo = tcg_temp_new_i64();
286
+ hi = tcg_temp_new_i64();
287
+ tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
288
+ tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
289
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
290
+ need_bswap = true;
291
+ } else {
292
+ lo = TCGV128_LOW(val);
293
+ hi = TCGV128_HIGH(val);
294
+ adj_oi = oi;
295
+ }
296
+
297
+#if TARGET_LONG_BITS == 32
298
+ addr_arg = tcgv_i32_arg(addr);
299
+#else
300
+ addr_arg = tcgv_i64_arg(addr);
301
+#endif
302
+ tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
303
+
304
+ if (need_bswap) {
305
+ tcg_temp_free_i64(lo);
306
+ tcg_temp_free_i64(hi);
307
+ }
308
+ } else if (use_two_i64_for_i128(memop)) {
309
MemOp mop[2];
310
TCGv addr_p8;
311
TCGv_i64 x, y;
312
diff --git a/tcg/tcg.c b/tcg/tcg.c
313
index XXXXXXX..XXXXXXX 100644
314
--- a/tcg/tcg.c
315
+++ b/tcg/tcg.c
316
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
317
case INDEX_op_qemu_st8_i32:
318
return TCG_TARGET_HAS_qemu_st8_i32;
319
320
+ case INDEX_op_qemu_ld_i128:
321
+ case INDEX_op_qemu_st_i128:
322
+ return TCG_TARGET_HAS_qemu_ldst_i128;
323
+
324
case INDEX_op_mov_i32:
325
case INDEX_op_setcond_i32:
326
case INDEX_op_brcond_i32:
327
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
328
case INDEX_op_qemu_st8_i32:
329
case INDEX_op_qemu_ld_i64:
330
case INDEX_op_qemu_st_i64:
331
+ case INDEX_op_qemu_ld_i128:
332
+ case INDEX_op_qemu_st_i128:
333
{
334
const char *s_al, *s_op, *s_at;
335
MemOpIdx oi = op->args[k++];
25
--
336
--
26
2.25.1
337
2.34.1
27
338
28
339
diff view generated by jsdifflib
1
Most of these are handled by creating a fold_const2_commutative
1
With x86_64 as host, we do not have any temporaries with which to
2
to handle all of the binary operators. The rest were already
2
resolve cycles, but we do have xchg. As a side bonus, the set of
3
handled on a case-by-case basis in the switch, and have their
3
graphs that can be made with 3 nodes and all nodes conflicting is
4
own fold function in which to place the call.
4
small: two. We can solve the cycle with a single temp.
5
5
6
We now have only one major switch on TCGOpcode.
6
This is required for x86_64 to handle stores of i128: 1 address
7
register and 2 data registers.
7
8
8
Introduce NO_DEST and a block comment for swap_commutative in
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
order to make the handling of brcond and movcond opcodes cleaner.
10
11
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
11
---
14
tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
12
tcg/tcg.c | 138 ++++++++++++++++++++++++++++++++++++++++++------------
15
1 file changed, 70 insertions(+), 72 deletions(-)
13
1 file changed, 108 insertions(+), 30 deletions(-)
16
14
17
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/tcg/tcg.c b/tcg/tcg.c
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/optimize.c
17
--- a/tcg/tcg.c
20
+++ b/tcg/optimize.c
18
+++ b/tcg/tcg.c
21
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
19
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
22
return -1;
20
tcg_out_movext1_new_src(s, i1, src1);
23
}
21
}
24
22
25
+/**
23
+/**
26
+ * swap_commutative:
24
+ * tcg_out_movext3 -- move and extend three pair
27
+ * @dest: TCGArg of the destination argument, or NO_DEST.
25
+ * @s: tcg context
28
+ * @p1: first paired argument
26
+ * @i1: first move description
29
+ * @p2: second paired argument
27
+ * @i2: second move description
28
+ * @i3: third move description
29
+ * @scratch: temporary register, or -1 for none
30
+ *
30
+ *
31
+ * If *@p1 is a constant and *@p2 is not, swap.
31
+ * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
32
+ * If *@p2 matches @dest, swap.
32
+ * between the sources and destinations.
33
+ * Return true if a swap was performed.
34
+ */
33
+ */
35
+
34
+
36
+#define NO_DEST temp_arg(NULL)
35
+static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
36
+ const TCGMovExtend *i2, const TCGMovExtend *i3,
37
+ int scratch)
38
+{
39
+ TCGReg src1 = i1->src;
40
+ TCGReg src2 = i2->src;
41
+ TCGReg src3 = i3->src;
37
+
42
+
38
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
43
+ if (i1->dst != src2 && i1->dst != src3) {
39
{
44
+ tcg_out_movext1(s, i1);
40
TCGArg a1 = *p1, a2 = *p2;
45
+ tcg_out_movext2(s, i2, i3, scratch);
41
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
46
+ return;
42
return false;
47
+ }
43
}
48
+ if (i2->dst != src1 && i2->dst != src3) {
44
49
+ tcg_out_movext1(s, i2);
45
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
50
+ tcg_out_movext2(s, i1, i3, scratch);
46
+{
51
+ return;
47
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
52
+ }
48
+ return fold_const2(ctx, op);
53
+ if (i3->dst != src1 && i3->dst != src2) {
54
+ tcg_out_movext1(s, i3);
55
+ tcg_out_movext2(s, i1, i2, scratch);
56
+ return;
57
+ }
58
+
59
+ /*
60
+ * There is a cycle. Since there are only 3 nodes, the cycle is
61
+ * either "clockwise" or "anti-clockwise", and can be solved with
62
+ * a single scratch or two xchg.
63
+ */
64
+ if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
65
+ /* "Clockwise" */
66
+ if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
67
+ tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
68
+ /* The data is now in the correct registers, now extend. */
69
+ tcg_out_movext1_new_src(s, i1, i1->dst);
70
+ tcg_out_movext1_new_src(s, i2, i2->dst);
71
+ tcg_out_movext1_new_src(s, i3, i3->dst);
72
+ } else {
73
+ tcg_debug_assert(scratch >= 0);
74
+ tcg_out_mov(s, i1->src_type, scratch, src1);
75
+ tcg_out_movext1(s, i3);
76
+ tcg_out_movext1(s, i2);
77
+ tcg_out_movext1_new_src(s, i1, scratch);
78
+ }
79
+ } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
80
+ /* "Anti-clockwise" */
81
+ if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
82
+ tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
83
+ /* The data is now in the correct registers, now extend. */
84
+ tcg_out_movext1_new_src(s, i1, i1->dst);
85
+ tcg_out_movext1_new_src(s, i2, i2->dst);
86
+ tcg_out_movext1_new_src(s, i3, i3->dst);
87
+ } else {
88
+ tcg_debug_assert(scratch >= 0);
89
+ tcg_out_mov(s, i1->src_type, scratch, src1);
90
+ tcg_out_movext1(s, i2);
91
+ tcg_out_movext1(s, i3);
92
+ tcg_out_movext1_new_src(s, i1, scratch);
93
+ }
94
+ } else {
95
+ g_assert_not_reached();
96
+ }
49
+}
97
+}
50
+
98
+
51
static bool fold_masks(OptContext *ctx, TCGOp *op)
99
#define C_PFX1(P, A) P##A
100
#define C_PFX2(P, A, B) P##A##_##B
101
#define C_PFX3(P, A, B, C) P##A##_##B##_##C
102
@@ -XXX,XX +XXX,XX @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
103
104
static void tcg_out_helper_load_regs(TCGContext *s,
105
unsigned nmov, TCGMovExtend *mov,
106
- unsigned ntmp, const int *tmp)
107
+ const TCGLdstHelperParam *parm)
52
{
108
{
53
uint64_t a_mask = ctx->a_mask;
109
+ TCGReg dst3;
54
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
110
+
55
111
switch (nmov) {
56
static bool fold_add(OptContext *ctx, TCGOp *op)
112
- default:
57
{
113
+ case 4:
58
- if (fold_const2(ctx, op) ||
114
/* The backend must have provided enough temps for the worst case. */
59
+ if (fold_const2_commutative(ctx, op) ||
115
- tcg_debug_assert(ntmp + 1 >= nmov);
60
fold_xi_to_x(ctx, op, 0)) {
116
+ tcg_debug_assert(parm->ntmp >= 2);
61
return true;
117
118
- for (unsigned i = nmov - 1; i >= 2; --i) {
119
- TCGReg dst = mov[i].dst;
120
+ dst3 = mov[3].dst;
121
+ for (unsigned j = 0; j < 3; ++j) {
122
+ if (dst3 == mov[j].src) {
123
+ /*
124
+ * Conflict. Copy the source to a temporary, perform the
125
+ * remaining moves, then the extension from our scratch
126
+ * on the way out.
127
+ */
128
+ TCGReg scratch = parm->tmp[1];
129
130
- for (unsigned j = 0; j < i; ++j) {
131
- if (dst == mov[j].src) {
132
- /*
133
- * Conflict.
134
- * Copy the source to a temporary, recurse for the
135
- * remaining moves, perform the extension from our
136
- * scratch on the way out.
137
- */
138
- TCGReg scratch = tmp[--ntmp];
139
- tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
140
- mov[i].src = scratch;
141
-
142
- tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
143
- tcg_out_movext1(s, &mov[i]);
144
- return;
145
- }
146
+ tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
147
+ tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
148
+ tcg_out_movext1_new_src(s, &mov[3], scratch);
149
+ break;
150
}
151
-
152
- /* No conflicts: perform this move and continue. */
153
- tcg_out_movext1(s, &mov[i]);
154
}
155
- /* fall through for the final two moves */
156
157
+ /* No conflicts: perform this move and continue. */
158
+ tcg_out_movext1(s, &mov[3]);
159
+ /* fall through */
160
+
161
+ case 3:
162
+ tcg_out_movext3(s, mov, mov + 1, mov + 2,
163
+ parm->ntmp ? parm->tmp[0] : -1);
164
+ break;
165
case 2:
166
- tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
167
- return;
168
+ tcg_out_movext2(s, mov, mov + 1,
169
+ parm->ntmp ? parm->tmp[0] : -1);
170
+ break;
171
case 1:
172
tcg_out_movext1(s, mov);
173
- return;
174
- case 0:
175
+ break;
176
+ default:
177
g_assert_not_reached();
62
}
178
}
63
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
64
65
static bool fold_add2(OptContext *ctx, TCGOp *op)
66
{
67
+ /* Note that the high and low parts may be independently swapped. */
68
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
69
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
70
+
71
return fold_addsub2(ctx, op, true);
72
}
179
}
73
180
@@ -XXX,XX +XXX,XX @@ static void tcg_out_helper_load_slots(TCGContext *s,
74
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
181
for (i = 0; i < nmov; ++i) {
75
{
182
mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
76
uint64_t z1, z2;
77
78
- if (fold_const2(ctx, op) ||
79
+ if (fold_const2_commutative(ctx, op) ||
80
fold_xi_to_i(ctx, op, 0) ||
81
fold_xi_to_x(ctx, op, -1) ||
82
fold_xx_to_x(ctx, op)) {
83
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
85
{
86
TCGCond cond = op->args[2];
87
- int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
88
+ int i;
89
90
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
91
+ op->args[2] = cond = tcg_swap_cond(cond);
92
+ }
93
+
94
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
95
if (i == 0) {
96
tcg_op_remove(ctx->tcg, op);
97
return true;
98
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
99
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
100
{
101
TCGCond cond = op->args[4];
102
- int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
103
TCGArg label = op->args[5];
104
- int inv = 0;
105
+ int i, inv = 0;
106
107
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
108
+ op->args[4] = cond = tcg_swap_cond(cond);
109
+ }
110
+
111
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
112
if (i >= 0) {
113
goto do_brcond_const;
114
}
183
}
115
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
184
- tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
116
185
+ tcg_out_helper_load_regs(s, nmov, mov, parm);
117
static bool fold_eqv(OptContext *ctx, TCGOp *op)
186
}
118
{
187
119
- if (fold_const2(ctx, op) ||
188
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
120
+ if (fold_const2_commutative(ctx, op) ||
121
fold_xi_to_x(ctx, op, -1) ||
122
fold_xi_to_not(ctx, op, 0)) {
123
return true;
124
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
125
static bool fold_movcond(OptContext *ctx, TCGOp *op)
126
{
127
TCGCond cond = op->args[5];
128
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
129
+ int i;
130
131
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
132
+ op->args[5] = cond = tcg_swap_cond(cond);
133
+ }
134
+ /*
135
+ * Canonicalize the "false" input reg to match the destination reg so
136
+ * that the tcg backend can implement a "move if true" operation.
137
+ */
138
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
139
+ op->args[5] = cond = tcg_invert_cond(cond);
140
+ }
141
+
142
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
143
if (i >= 0) {
144
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
145
}
146
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
147
148
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
149
{
150
- if (fold_const2(ctx, op) ||
151
+ if (fold_const2_commutative(ctx, op) ||
152
fold_xi_to_i(ctx, op, 0)) {
153
return true;
154
}
155
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
156
157
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
158
{
159
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
160
+
161
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
162
uint64_t a = arg_info(op->args[2])->val;
163
uint64_t b = arg_info(op->args[3])->val;
164
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
165
166
static bool fold_nand(OptContext *ctx, TCGOp *op)
167
{
168
- if (fold_const2(ctx, op) ||
169
+ if (fold_const2_commutative(ctx, op) ||
170
fold_xi_to_not(ctx, op, -1)) {
171
return true;
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
174
175
static bool fold_nor(OptContext *ctx, TCGOp *op)
176
{
177
- if (fold_const2(ctx, op) ||
178
+ if (fold_const2_commutative(ctx, op) ||
179
fold_xi_to_not(ctx, op, 0)) {
180
return true;
181
}
182
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
183
184
static bool fold_or(OptContext *ctx, TCGOp *op)
185
{
186
- if (fold_const2(ctx, op) ||
187
+ if (fold_const2_commutative(ctx, op) ||
188
fold_xi_to_x(ctx, op, 0) ||
189
fold_xx_to_x(ctx, op)) {
190
return true;
191
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
192
static bool fold_setcond(OptContext *ctx, TCGOp *op)
193
{
194
TCGCond cond = op->args[3];
195
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
196
+ int i;
197
198
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
199
+ op->args[3] = cond = tcg_swap_cond(cond);
200
+ }
201
+
202
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
203
if (i >= 0) {
204
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
205
}
206
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
207
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
208
{
209
TCGCond cond = op->args[5];
210
- int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
211
- int inv = 0;
212
+ int i, inv = 0;
213
214
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
215
+ op->args[5] = cond = tcg_swap_cond(cond);
216
+ }
217
+
218
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
219
if (i >= 0) {
220
goto do_setcond_const;
221
}
222
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
223
224
static bool fold_xor(OptContext *ctx, TCGOp *op)
225
{
226
- if (fold_const2(ctx, op) ||
227
+ if (fold_const2_commutative(ctx, op) ||
228
fold_xx_to_i(ctx, op, 0) ||
229
fold_xi_to_x(ctx, op, 0) ||
230
fold_xi_to_not(ctx, op, -1)) {
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
ctx.type = TCG_TYPE_I32;
233
}
234
235
- /* For commutative operations make constant second argument */
236
- switch (opc) {
237
- CASE_OP_32_64_VEC(add):
238
- CASE_OP_32_64_VEC(mul):
239
- CASE_OP_32_64_VEC(and):
240
- CASE_OP_32_64_VEC(or):
241
- CASE_OP_32_64_VEC(xor):
242
- CASE_OP_32_64(eqv):
243
- CASE_OP_32_64(nand):
244
- CASE_OP_32_64(nor):
245
- CASE_OP_32_64(muluh):
246
- CASE_OP_32_64(mulsh):
247
- swap_commutative(op->args[0], &op->args[1], &op->args[2]);
248
- break;
249
- CASE_OP_32_64(brcond):
250
- if (swap_commutative(-1, &op->args[0], &op->args[1])) {
251
- op->args[2] = tcg_swap_cond(op->args[2]);
252
- }
253
- break;
254
- CASE_OP_32_64(setcond):
255
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
256
- op->args[3] = tcg_swap_cond(op->args[3]);
257
- }
258
- break;
259
- CASE_OP_32_64(movcond):
260
- if (swap_commutative(-1, &op->args[1], &op->args[2])) {
261
- op->args[5] = tcg_swap_cond(op->args[5]);
262
- }
263
- /* For movcond, we canonicalize the "false" input reg to match
264
- the destination reg so that the tcg backend can implement
265
- a "move if true" operation. */
266
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
267
- op->args[5] = tcg_invert_cond(op->args[5]);
268
- }
269
- break;
270
- CASE_OP_32_64(add2):
271
- swap_commutative(op->args[0], &op->args[2], &op->args[4]);
272
- swap_commutative(op->args[1], &op->args[3], &op->args[5]);
273
- break;
274
- CASE_OP_32_64(mulu2):
275
- CASE_OP_32_64(muls2):
276
- swap_commutative(op->args[0], &op->args[2], &op->args[3]);
277
- break;
278
- case INDEX_op_brcond2_i32:
279
- if (swap_commutative2(&op->args[0], &op->args[2])) {
280
- op->args[4] = tcg_swap_cond(op->args[4]);
281
- }
282
- break;
283
- case INDEX_op_setcond2_i32:
284
- if (swap_commutative2(&op->args[1], &op->args[3])) {
285
- op->args[5] = tcg_swap_cond(op->args[5]);
286
- }
287
- break;
288
- default:
289
- break;
290
- }
291
-
292
/* Assume all bits affected, and no bits known zero. */
293
ctx.a_mask = -1;
294
ctx.z_mask = -1;
295
--
189
--
296
2.25.1
190
2.34.1
297
298
diff view generated by jsdifflib
1
Pull the "op r, a, 0 => movi r, 0" optimization into a function,
1
Now that tcg_out_helper_load_regs is not recursive, we can
2
and use it in the outer opcode fold functions.
2
merge it into its only caller, tcg_out_helper_load_slots.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 38 ++++++++++++++++++++------------------
7
tcg/tcg.c | 89 +++++++++++++++++++++++++------------------------------
9
1 file changed, 20 insertions(+), 18 deletions(-)
8
1 file changed, 41 insertions(+), 48 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/tcg/tcg.c
14
+++ b/tcg/optimize.c
13
+++ b/tcg/tcg.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
16
return false;
15
return ofs;
17
}
16
}
18
17
19
+/* If the binary operation has second argument @i, fold to @i. */
18
-static void tcg_out_helper_load_regs(TCGContext *s,
20
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
19
- unsigned nmov, TCGMovExtend *mov,
21
+{
20
- const TCGLdstHelperParam *parm)
22
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
21
+static void tcg_out_helper_load_slots(TCGContext *s,
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
22
+ unsigned nmov, TCGMovExtend *mov,
23
+ const TCGLdstHelperParam *parm)
24
{
25
+ unsigned i;
26
TCGReg dst3;
27
28
+ /*
29
+ * Start from the end, storing to the stack first.
30
+ * This frees those registers, so we need not consider overlap.
31
+ */
32
+ for (i = nmov; i-- > 0; ) {
33
+ unsigned slot = mov[i].dst;
34
+
35
+ if (arg_slot_reg_p(slot)) {
36
+ goto found_reg;
37
+ }
38
+
39
+ TCGReg src = mov[i].src;
40
+ TCGType dst_type = mov[i].dst_type;
41
+ MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
42
+
43
+ /* The argument is going onto the stack; extend into scratch. */
44
+ if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
45
+ tcg_debug_assert(parm->ntmp != 0);
46
+ mov[i].dst = src = parm->tmp[0];
47
+ tcg_out_movext1(s, &mov[i]);
48
+ }
49
+
50
+ tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
51
+ tcg_out_helper_stk_ofs(dst_type, slot));
24
+ }
52
+ }
25
+ return false;
53
+ return;
26
+}
27
+
54
+
28
/* If the binary operation has both arguments equal, fold to @i. */
55
+ found_reg:
29
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
56
+ /*
30
{
57
+ * The remaining arguments are in registers.
31
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
58
+ * Convert slot numbers to argument registers.
32
static bool fold_and(OptContext *ctx, TCGOp *op)
59
+ */
33
{
60
+ nmov = i + 1;
34
if (fold_const2(ctx, op) ||
61
+ for (i = 0; i < nmov; ++i) {
35
+ fold_xi_to_i(ctx, op, 0) ||
62
+ mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
36
fold_xx_to_x(ctx, op)) {
63
+ }
37
return true;
64
+
65
switch (nmov) {
66
case 4:
67
/* The backend must have provided enough temps for the worst case. */
68
@@ -XXX,XX +XXX,XX @@ static void tcg_out_helper_load_regs(TCGContext *s,
38
}
69
}
39
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
40
41
static bool fold_mul(OptContext *ctx, TCGOp *op)
42
{
43
- return fold_const2(ctx, op);
44
+ if (fold_const2(ctx, op) ||
45
+ fold_xi_to_i(ctx, op, 0)) {
46
+ return true;
47
+ }
48
+ return false;
49
}
70
}
50
71
51
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
72
-static void tcg_out_helper_load_slots(TCGContext *s,
52
{
73
- unsigned nmov, TCGMovExtend *mov,
53
- return fold_const2(ctx, op);
74
- const TCGLdstHelperParam *parm)
54
+ if (fold_const2(ctx, op) ||
75
-{
55
+ fold_xi_to_i(ctx, op, 0)) {
76
- unsigned i;
56
+ return true;
77
-
57
+ }
78
- /*
58
+ return false;
79
- * Start from the end, storing to the stack first.
59
}
80
- * This frees those registers, so we need not consider overlap.
60
81
- */
61
static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
82
- for (i = nmov; i-- > 0; ) {
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
83
- unsigned slot = mov[i].dst;
63
continue;
84
-
64
}
85
- if (arg_slot_reg_p(slot)) {
65
86
- goto found_reg;
66
- /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
67
- switch (opc) {
68
- CASE_OP_32_64_VEC(and):
69
- CASE_OP_32_64_VEC(mul):
70
- CASE_OP_32_64(muluh):
71
- CASE_OP_32_64(mulsh):
72
- if (arg_is_const(op->args[2])
73
- && arg_info(op->args[2])->val == 0) {
74
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
75
- continue;
76
- }
77
- break;
78
- default:
79
- break;
80
- }
87
- }
81
-
88
-
82
/*
89
- TCGReg src = mov[i].src;
83
* Process each opcode.
90
- TCGType dst_type = mov[i].dst_type;
84
* Sorted alphabetically by opcode as much as possible.
91
- MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
92
-
93
- /* The argument is going onto the stack; extend into scratch. */
94
- if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
95
- tcg_debug_assert(parm->ntmp != 0);
96
- mov[i].dst = src = parm->tmp[0];
97
- tcg_out_movext1(s, &mov[i]);
98
- }
99
-
100
- tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
101
- tcg_out_helper_stk_ofs(dst_type, slot));
102
- }
103
- return;
104
-
105
- found_reg:
106
- /*
107
- * The remaining arguments are in registers.
108
- * Convert slot numbers to argument registers.
109
- */
110
- nmov = i + 1;
111
- for (i = 0; i < nmov; ++i) {
112
- mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
113
- }
114
- tcg_out_helper_load_regs(s, nmov, mov, parm);
115
-}
116
-
117
static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
118
TCGType type, tcg_target_long imm,
119
const TCGLdstHelperParam *parm)
85
--
120
--
86
2.25.1
121
2.34.1
87
88
diff view generated by jsdifflib
1
Compute the type of the operation early.
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
3
There are at least 4 places that used a def->flags ladder
4
to determine the type of the operation being optimized.
5
6
There were two places that assumed !TCG_OPF_64BIT means
7
TCG_TYPE_I32, and so could potentially compute incorrect
8
results for vector operations.
9
10
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
3
---
13
tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
4
tcg/tcg.c | 196 +++++++++++++++++++++++++++++++++++++++++++++---------
14
1 file changed, 89 insertions(+), 60 deletions(-)
5
1 file changed, 163 insertions(+), 33 deletions(-)
15
6
16
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
diff --git a/tcg/tcg.c b/tcg/tcg.c
17
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/optimize.c
9
--- a/tcg/tcg.c
19
+++ b/tcg/optimize.c
10
+++ b/tcg/tcg.c
20
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
11
@@ -XXX,XX +XXX,XX @@ static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
21
12
[MO_UQ] = helper_ldq_mmu,
22
/* In flight values from optimization. */
13
#if TCG_TARGET_REG_BITS == 64
23
uint64_t z_mask;
14
[MO_SL] = helper_ldsl_mmu,
24
+ TCGType type;
15
+ [MO_128] = helper_ld16_mmu,
25
} OptContext;
16
#endif
26
17
};
27
static inline TempOptInfo *ts_info(TCGTemp *ts)
18
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
19
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
20
[MO_16] = helper_stw_mmu,
21
[MO_32] = helper_stl_mmu,
22
[MO_64] = helper_stq_mmu,
23
+#if TCG_TARGET_REG_BITS == 64
24
+ [MO_128] = helper_st16_mmu,
25
+#endif
26
};
27
28
TCGContext tcg_init_ctx;
29
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld64_mmu = {
30
| dh_typemask(ptr, 4) /* uintptr_t ra */
31
};
32
33
+static TCGHelperInfo info_helper_ld128_mmu = {
34
+ .flags = TCG_CALL_NO_WG,
35
+ .typemask = dh_typemask(i128, 0) /* return Int128 */
36
+ | dh_typemask(env, 1)
37
+ | dh_typemask(tl, 2) /* target_ulong addr */
38
+ | dh_typemask(i32, 3) /* unsigned oi */
39
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
40
+};
41
+
42
static TCGHelperInfo info_helper_st32_mmu = {
43
.flags = TCG_CALL_NO_WG,
44
.typemask = dh_typemask(void, 0)
45
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st64_mmu = {
46
| dh_typemask(ptr, 5) /* uintptr_t ra */
47
};
48
49
+static TCGHelperInfo info_helper_st128_mmu = {
50
+ .flags = TCG_CALL_NO_WG,
51
+ .typemask = dh_typemask(void, 0)
52
+ | dh_typemask(env, 1)
53
+ | dh_typemask(tl, 2) /* target_ulong addr */
54
+ | dh_typemask(i128, 3) /* Int128 data */
55
+ | dh_typemask(i32, 4) /* unsigned oi */
56
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
57
+};
58
+
59
#ifdef CONFIG_TCG_INTERPRETER
60
static ffi_type *typecode_to_ffi(int argmask)
29
{
61
{
30
TCGTemp *dst_ts = arg_temp(dst);
62
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
31
TCGTemp *src_ts = arg_temp(src);
63
32
- const TCGOpDef *def;
64
init_call_layout(&info_helper_ld32_mmu);
33
TempOptInfo *di;
65
init_call_layout(&info_helper_ld64_mmu);
34
TempOptInfo *si;
66
+ init_call_layout(&info_helper_ld128_mmu);
35
uint64_t z_mask;
67
init_call_layout(&info_helper_st32_mmu);
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
68
init_call_layout(&info_helper_st64_mmu);
37
reset_ts(dst_ts);
69
+ init_call_layout(&info_helper_st128_mmu);
38
di = ts_info(dst_ts);
70
39
si = ts_info(src_ts);
71
#ifdef CONFIG_TCG_INTERPRETER
40
- def = &tcg_op_defs[op->opc];
72
init_ffi_layouts();
41
- if (def->flags & TCG_OPF_VECTOR) {
73
@@ -XXX,XX +XXX,XX @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
42
- new_op = INDEX_op_mov_vec;
74
TCGType dst_type, TCGType src_type,
43
- } else if (def->flags & TCG_OPF_64BIT) {
75
TCGReg lo, TCGReg hi)
44
- new_op = INDEX_op_mov_i64;
76
{
77
+ MemOp reg_mo;
78
+
79
if (dst_type <= TCG_TYPE_REG) {
80
MemOp src_ext;
81
82
@@ -XXX,XX +XXX,XX @@ static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
83
return 1;
84
}
85
86
- assert(TCG_TARGET_REG_BITS == 32);
87
+ if (TCG_TARGET_REG_BITS == 32) {
88
+ assert(dst_type == TCG_TYPE_I64);
89
+ reg_mo = MO_32;
90
+ } else {
91
+ assert(dst_type == TCG_TYPE_I128);
92
+ reg_mo = MO_64;
93
+ }
94
95
mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
96
mov[0].src = lo;
97
- mov[0].dst_type = TCG_TYPE_I32;
98
- mov[0].src_type = TCG_TYPE_I32;
99
- mov[0].src_ext = MO_32;
100
+ mov[0].dst_type = TCG_TYPE_REG;
101
+ mov[0].src_type = TCG_TYPE_REG;
102
+ mov[0].src_ext = reg_mo;
103
104
mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
105
mov[1].src = hi;
106
- mov[1].dst_type = TCG_TYPE_I32;
107
- mov[1].src_type = TCG_TYPE_I32;
108
- mov[1].src_ext = MO_32;
109
+ mov[1].dst_type = TCG_TYPE_REG;
110
+ mov[1].src_type = TCG_TYPE_REG;
111
+ mov[1].src_ext = reg_mo;
112
113
return 2;
114
}
115
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
116
case MO_64:
117
info = &info_helper_ld64_mmu;
118
break;
119
+ case MO_128:
120
+ info = &info_helper_ld128_mmu;
121
+ break;
122
default:
123
g_assert_not_reached();
124
}
125
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
126
127
tcg_out_helper_load_slots(s, nmov, mov, parm);
128
129
- /* No special attention for 32 and 64-bit return values. */
130
- tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
131
+ switch (info->out_kind) {
132
+ case TCG_CALL_RET_NORMAL:
133
+ case TCG_CALL_RET_BY_VEC:
134
+ break;
135
+ case TCG_CALL_RET_BY_REF:
136
+ /*
137
+ * The return reference is in the first argument slot.
138
+ * We need memory in which to return: re-use the top of stack.
139
+ */
140
+ {
141
+ int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
142
+
143
+ if (arg_slot_reg_p(0)) {
144
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
145
+ TCG_REG_CALL_STACK, ofs_slot0);
146
+ } else {
147
+ tcg_debug_assert(parm->ntmp != 0);
148
+ tcg_out_addi_ptr(s, parm->tmp[0],
149
+ TCG_REG_CALL_STACK, ofs_slot0);
150
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
151
+ TCG_REG_CALL_STACK, ofs_slot0);
152
+ }
153
+ }
154
+ break;
155
+ default:
156
+ g_assert_not_reached();
157
+ }
158
159
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
160
}
161
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
162
bool load_sign,
163
const TCGLdstHelperParam *parm)
164
{
165
+ MemOp mop = get_memop(ldst->oi);
166
TCGMovExtend mov[2];
167
+ int ofs_slot0;
168
169
- if (ldst->type <= TCG_TYPE_REG) {
170
- MemOp mop = get_memop(ldst->oi);
171
+ switch (ldst->type) {
172
+ case TCG_TYPE_I64:
173
+ if (TCG_TARGET_REG_BITS == 32) {
174
+ break;
175
+ }
176
+ /* fall through */
177
178
+ case TCG_TYPE_I32:
179
mov[0].dst = ldst->datalo_reg;
180
mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
181
mov[0].dst_type = ldst->type;
182
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
183
mov[0].src_ext = mop & MO_SSIZE;
184
}
185
tcg_out_movext1(s, mov);
45
- } else {
186
- } else {
46
+
187
- assert(TCG_TARGET_REG_BITS == 32);
47
+ switch (ctx->type) {
188
+ return;
48
+ case TCG_TYPE_I32:
189
49
new_op = INDEX_op_mov_i32;
190
- mov[0].dst = ldst->datalo_reg;
50
+ break;
191
- mov[0].src =
51
+ case TCG_TYPE_I64:
192
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
52
+ new_op = INDEX_op_mov_i64;
193
- mov[0].dst_type = TCG_TYPE_I32;
53
+ break;
194
- mov[0].src_type = TCG_TYPE_I32;
54
+ case TCG_TYPE_V64:
195
- mov[0].src_ext = MO_32;
55
+ case TCG_TYPE_V128:
196
+ case TCG_TYPE_I128:
56
+ case TCG_TYPE_V256:
197
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
57
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
198
+ ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
58
+ new_op = INDEX_op_mov_vec;
199
+ switch (TCG_TARGET_CALL_RET_I128) {
59
+ break;
200
+ case TCG_CALL_RET_NORMAL:
201
+ break;
202
+ case TCG_CALL_RET_BY_VEC:
203
+ tcg_out_st(s, TCG_TYPE_V128,
204
+ tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
205
+ TCG_REG_CALL_STACK, ofs_slot0);
206
+ /* fall through */
207
+ case TCG_CALL_RET_BY_REF:
208
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
209
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
210
+ tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
211
+ TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
212
+ return;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ break;
217
218
- mov[1].dst = ldst->datahi_reg;
219
- mov[1].src =
220
- tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
221
- mov[1].dst_type = TCG_TYPE_REG;
222
- mov[1].src_type = TCG_TYPE_REG;
223
- mov[1].src_ext = MO_32;
224
-
225
- tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
60
+ default:
226
+ default:
61
+ g_assert_not_reached();
227
+ g_assert_not_reached();
62
}
228
}
63
op->opc = new_op;
229
+
64
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
230
+ mov[0].dst = ldst->datalo_reg;
65
op->args[0] = dst;
231
+ mov[0].src =
66
op->args[1] = src;
232
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
67
233
+ mov[0].dst_type = TCG_TYPE_I32;
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
234
+ mov[0].src_type = TCG_TYPE_I32;
69
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
235
+ mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
70
TCGArg dst, uint64_t val)
236
+
71
{
237
+ mov[1].dst = ldst->datahi_reg;
72
- const TCGOpDef *def = &tcg_op_defs[op->opc];
238
+ mov[1].src =
73
- TCGType type;
239
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
74
- TCGTemp *tv;
240
+ mov[1].dst_type = TCG_TYPE_REG;
75
-
241
+ mov[1].src_type = TCG_TYPE_REG;
76
- if (def->flags & TCG_OPF_VECTOR) {
242
+ mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
77
- type = TCGOP_VECL(op) + TCG_TYPE_V64;
243
+
78
- } else if (def->flags & TCG_OPF_64BIT) {
244
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
79
- type = TCG_TYPE_I64;
80
- } else {
81
- type = TCG_TYPE_I32;
82
- }
83
-
84
/* Convert movi to mov with constant temp. */
85
- tv = tcg_constant_internal(type, val);
86
+ TCGTemp *tv = tcg_constant_internal(ctx->type, val);
87
+
88
init_ts_info(ctx, tv);
89
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
90
}
245
}
91
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
246
247
static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
248
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
249
info = &info_helper_st64_mmu;
250
data_type = TCG_TYPE_I64;
251
break;
252
+ case MO_128:
253
+ info = &info_helper_st128_mmu;
254
+ data_type = TCG_TYPE_I128;
255
+ break;
256
default:
257
g_assert_not_reached();
92
}
258
}
259
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
260
261
/* Handle data argument. */
262
loc = &info->in[next_arg];
263
- n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
264
- ldst->datalo_reg, ldst->datahi_reg);
265
- next_arg += n;
266
- nmov += n;
267
- tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
268
+ switch (loc->kind) {
269
+ case TCG_CALL_ARG_NORMAL:
270
+ case TCG_CALL_ARG_EXTEND_U:
271
+ case TCG_CALL_ARG_EXTEND_S:
272
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
273
+ ldst->datalo_reg, ldst->datahi_reg);
274
+ next_arg += n;
275
+ nmov += n;
276
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
277
+ break;
278
+
279
+ case TCG_CALL_ARG_BY_REF:
280
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
281
+ tcg_debug_assert(data_type == TCG_TYPE_I128);
282
+ tcg_out_st(s, TCG_TYPE_I64,
283
+ HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
284
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
285
+ tcg_out_st(s, TCG_TYPE_I64,
286
+ HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
287
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
288
+
289
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
290
+
291
+ if (arg_slot_reg_p(loc->arg_slot)) {
292
+ tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
293
+ TCG_REG_CALL_STACK,
294
+ arg_slot_stk_ofs(loc->ref_slot));
295
+ } else {
296
+ tcg_debug_assert(parm->ntmp != 0);
297
+ tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
298
+ arg_slot_stk_ofs(loc->ref_slot));
299
+ tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
300
+ TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
301
+ }
302
+ next_arg += 2;
303
+ break;
304
+
305
+ default:
306
+ g_assert_not_reached();
307
+ }
308
309
- tcg_out_helper_load_slots(s, nmov, mov, parm);
310
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
93
}
311
}
94
312
95
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
96
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
97
+ uint64_t x, uint64_t y)
98
{
99
- const TCGOpDef *def = &tcg_op_defs[op];
100
uint64_t res = do_constant_folding_2(op, x, y);
101
- if (!(def->flags & TCG_OPF_64BIT)) {
102
+ if (type == TCG_TYPE_I32) {
103
res = (int32_t)res;
104
}
105
return res;
106
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
107
* Return -1 if the condition can't be simplified,
108
* and the result of the condition (0 or 1) if it can.
109
*/
110
-static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
111
+static int do_constant_folding_cond(TCGType type, TCGArg x,
112
TCGArg y, TCGCond c)
113
{
114
uint64_t xv = arg_info(x)->val;
115
uint64_t yv = arg_info(y)->val;
116
117
if (arg_is_const(x) && arg_is_const(y)) {
118
- const TCGOpDef *def = &tcg_op_defs[op];
119
- tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
120
- if (def->flags & TCG_OPF_64BIT) {
121
- return do_constant_folding_cond_64(xv, yv, c);
122
- } else {
123
+ switch (type) {
124
+ case TCG_TYPE_I32:
125
return do_constant_folding_cond_32(xv, yv, c);
126
+ case TCG_TYPE_I64:
127
+ return do_constant_folding_cond_64(xv, yv, c);
128
+ default:
129
+ /* Only scalar comparisons are optimizable */
130
+ return -1;
131
}
132
} else if (args_are_copies(x, y)) {
133
return do_constant_folding_cond_eq(c);
134
@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
135
uint64_t t;
136
137
t = arg_info(op->args[1])->val;
138
- t = do_constant_folding(op->opc, t, 0);
139
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
140
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
141
}
142
return false;
143
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
144
uint64_t t1 = arg_info(op->args[1])->val;
145
uint64_t t2 = arg_info(op->args[2])->val;
146
147
- t1 = do_constant_folding(op->opc, t1, t2);
148
+ t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
149
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
150
}
151
return false;
152
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
153
static bool fold_brcond(OptContext *ctx, TCGOp *op)
154
{
155
TCGCond cond = op->args[2];
156
- int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
157
+ int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
158
159
if (i == 0) {
160
tcg_op_remove(ctx->tcg, op);
161
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
162
* Simplify EQ/NE comparisons where one of the pairs
163
* can be simplified.
164
*/
165
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
166
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
167
op->args[2], cond);
168
switch (i ^ inv) {
169
case 0:
170
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
171
goto do_brcond_high;
172
}
173
174
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
175
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
176
op->args[3], cond);
177
switch (i ^ inv) {
178
case 0:
179
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
180
if (arg_is_const(op->args[1])) {
181
uint64_t t = arg_info(op->args[1])->val;
182
183
- t = do_constant_folding(op->opc, t, op->args[2]);
184
+ t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
185
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
186
}
187
return false;
188
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
189
uint64_t t = arg_info(op->args[1])->val;
190
191
if (t != 0) {
192
- t = do_constant_folding(op->opc, t, 0);
193
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
194
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
195
}
196
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
197
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
198
199
static bool fold_movcond(OptContext *ctx, TCGOp *op)
200
{
201
- TCGOpcode opc = op->opc;
202
TCGCond cond = op->args[5];
203
- int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
204
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
205
206
if (i >= 0) {
207
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
208
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
209
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
210
uint64_t tv = arg_info(op->args[3])->val;
211
uint64_t fv = arg_info(op->args[4])->val;
212
+ TCGOpcode opc;
213
214
- opc = (opc == INDEX_op_movcond_i32
215
- ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
216
+ switch (ctx->type) {
217
+ case TCG_TYPE_I32:
218
+ opc = INDEX_op_setcond_i32;
219
+ break;
220
+ case TCG_TYPE_I64:
221
+ opc = INDEX_op_setcond_i64;
222
+ break;
223
+ default:
224
+ g_assert_not_reached();
225
+ }
226
227
if (tv == 1 && fv == 0) {
228
op->opc = opc;
229
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
230
static bool fold_setcond(OptContext *ctx, TCGOp *op)
231
{
232
TCGCond cond = op->args[3];
233
- int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
234
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
235
236
if (i >= 0) {
237
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
238
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
239
* Simplify EQ/NE comparisons where one of the pairs
240
* can be simplified.
241
*/
242
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
243
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
244
op->args[3], cond);
245
switch (i ^ inv) {
246
case 0:
247
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
248
goto do_setcond_high;
249
}
250
251
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
252
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
253
op->args[4], cond);
254
switch (i ^ inv) {
255
case 0:
256
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
257
init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
258
copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
259
260
+ /* Pre-compute the type of the operation. */
261
+ if (def->flags & TCG_OPF_VECTOR) {
262
+ ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
263
+ } else if (def->flags & TCG_OPF_64BIT) {
264
+ ctx.type = TCG_TYPE_I64;
265
+ } else {
266
+ ctx.type = TCG_TYPE_I32;
267
+ }
268
+
269
/* For commutative operations make constant second argument */
270
switch (opc) {
271
CASE_OP_32_64_VEC(add):
272
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
273
/* Proceed with possible constant folding. */
274
break;
275
}
276
- if (opc == INDEX_op_sub_i32) {
277
+ switch (ctx.type) {
278
+ case TCG_TYPE_I32:
279
neg_op = INDEX_op_neg_i32;
280
have_neg = TCG_TARGET_HAS_neg_i32;
281
- } else if (opc == INDEX_op_sub_i64) {
282
+ break;
283
+ case TCG_TYPE_I64:
284
neg_op = INDEX_op_neg_i64;
285
have_neg = TCG_TARGET_HAS_neg_i64;
286
- } else if (TCG_TARGET_HAS_neg_vec) {
287
- TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
288
- unsigned vece = TCGOP_VECE(op);
289
- neg_op = INDEX_op_neg_vec;
290
- have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
291
- } else {
292
break;
293
+ case TCG_TYPE_V64:
294
+ case TCG_TYPE_V128:
295
+ case TCG_TYPE_V256:
296
+ neg_op = INDEX_op_neg_vec;
297
+ have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
298
+ TCGOP_VECE(op)) > 0;
299
+ break;
300
+ default:
301
+ g_assert_not_reached();
302
}
303
if (!have_neg) {
304
break;
305
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
306
TCGOpcode not_op;
307
bool have_not;
308
309
- if (def->flags & TCG_OPF_VECTOR) {
310
- not_op = INDEX_op_not_vec;
311
- have_not = TCG_TARGET_HAS_not_vec;
312
- } else if (def->flags & TCG_OPF_64BIT) {
313
- not_op = INDEX_op_not_i64;
314
- have_not = TCG_TARGET_HAS_not_i64;
315
- } else {
316
+ switch (ctx.type) {
317
+ case TCG_TYPE_I32:
318
not_op = INDEX_op_not_i32;
319
have_not = TCG_TARGET_HAS_not_i32;
320
+ break;
321
+ case TCG_TYPE_I64:
322
+ not_op = INDEX_op_not_i64;
323
+ have_not = TCG_TARGET_HAS_not_i64;
324
+ break;
325
+ case TCG_TYPE_V64:
326
+ case TCG_TYPE_V128:
327
+ case TCG_TYPE_V256:
328
+ not_op = INDEX_op_not_vec;
329
+ have_not = TCG_TARGET_HAS_not_vec;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
}
334
if (!have_not) {
335
break;
336
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
337
below, we can ignore high bits, but for further optimizations we
338
need to record that the high bits contain garbage. */
339
partmask = z_mask;
340
- if (!(def->flags & TCG_OPF_64BIT)) {
341
+ if (ctx.type == TCG_TYPE_I32) {
342
z_mask |= ~(tcg_target_ulong)0xffffffffu;
343
partmask &= 0xffffffffu;
344
affected &= 0xffffffffu;
345
--
313
--
346
2.25.1
314
2.34.1
347
348
diff view generated by jsdifflib
1
Reduce some code duplication by folding the NE and EQ cases.
1
Examine MemOp for atomicity and alignment, adjusting alignment
2
as required to implement atomicity on the host.
2
3
3
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
---
6
tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
7
tcg/tcg.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
7
1 file changed, 81 insertions(+), 78 deletions(-)
8
1 file changed, 95 insertions(+)
8
9
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/tcg.c b/tcg/tcg.c
10
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
--- a/tcg/tcg.c
12
+++ b/tcg/optimize.c
13
+++ b/tcg/tcg.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
14
return fold_const2(ctx, op);
15
#endif
16
};
17
18
+typedef struct {
19
+ MemOp atom; /* lg2 bits of atomicity required */
20
+ MemOp align; /* lg2 bits of alignment to use */
21
+} TCGAtomAlign;
22
+
23
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
24
+ MemOp host_atom, bool allow_two_ops)
25
+ __attribute__((unused));
26
+
27
TCGContext tcg_init_ctx;
28
__thread TCGContext *tcg_ctx;
29
30
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
31
}
15
}
32
}
16
33
17
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
34
+/**
35
+ * atom_and_align_for_opc:
36
+ * @s: tcg context
37
+ * @opc: memory operation code
38
+ * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
39
+ * @allow_two_ops: true if we are prepared to issue two operations
40
+ *
41
+ * Return the alignment and atomicity to use for the inline fast path
42
+ * for the given memory operation. The alignment may be larger than
43
+ * that specified in @opc, and the correct alignment will be diagnosed
44
+ * by the slow path helper.
45
+ *
46
+ * If @allow_two_ops, the host is prepared to test for 2x alignment,
47
+ * and issue two loads or stores for subalignment.
48
+ */
49
+static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
50
+ MemOp host_atom, bool allow_two_ops)
18
+{
51
+{
19
+ TCGCond cond = op->args[4];
52
+ MemOp align = get_alignment_bits(opc);
20
+ int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
53
+ MemOp size = opc & MO_SIZE;
21
+ TCGArg label = op->args[5];
54
+ MemOp half = size ? size - 1 : 0;
22
+ int inv = 0;
55
+ MemOp atmax;
56
+ MemOp atom;
23
+
57
+
24
+ if (i >= 0) {
58
+ /* When serialized, no further atomicity required. */
25
+ goto do_brcond_const;
59
+ if (s->gen_tb->cflags & CF_PARALLEL) {
60
+ atom = opc & MO_ATOM_MASK;
61
+ } else {
62
+ atom = MO_ATOM_NONE;
26
+ }
63
+ }
27
+
64
+
28
+ switch (cond) {
65
+ switch (atom) {
29
+ case TCG_COND_LT:
66
+ case MO_ATOM_NONE:
30
+ case TCG_COND_GE:
67
+ /* The operation requires no specific atomicity. */
31
+ /*
68
+ atmax = MO_8;
32
+ * Simplify LT/GE comparisons vs zero to a single compare
69
+ break;
33
+ * vs the high word of the input.
70
+
34
+ */
71
+ case MO_ATOM_IFALIGN:
35
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
72
+ atmax = size;
36
+ arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
73
+ break;
37
+ goto do_brcond_high;
74
+
75
+ case MO_ATOM_IFALIGN_PAIR:
76
+ atmax = half;
77
+ break;
78
+
79
+ case MO_ATOM_WITHIN16:
80
+ atmax = size;
81
+ if (size == MO_128) {
82
+ /* Misalignment implies !within16, and therefore no atomicity. */
83
+ } else if (host_atom != MO_ATOM_WITHIN16) {
84
+ /* The host does not implement within16, so require alignment. */
85
+ align = MAX(align, size);
38
+ }
86
+ }
39
+ break;
87
+ break;
40
+
88
+
41
+ case TCG_COND_NE:
89
+ case MO_ATOM_WITHIN16_PAIR:
42
+ inv = 1;
90
+ atmax = size;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
45
+ /*
91
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
92
+ * Misalignment implies !within16, and therefore half atomicity.
47
+ * can be simplified.
93
+ * Any host prepared for two operations can implement this with
94
+ * half alignment.
48
+ */
95
+ */
49
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
96
+ if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
50
+ op->args[2], cond);
97
+ align = MAX(align, half);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_brcond_const;
54
+ case 1:
55
+ goto do_brcond_high;
56
+ }
98
+ }
99
+ break;
57
+
100
+
58
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
101
+ case MO_ATOM_SUBALIGN:
59
+ op->args[3], cond);
102
+ atmax = size;
60
+ switch (i ^ inv) {
103
+ if (host_atom != MO_ATOM_SUBALIGN) {
61
+ case 0:
104
+ /* If unaligned but not odd, there are subobjects up to half. */
62
+ goto do_brcond_const;
105
+ if (allow_two_ops) {
63
+ case 1:
106
+ align = MAX(align, half);
64
+ op->opc = INDEX_op_brcond_i32;
107
+ } else {
65
+ op->args[1] = op->args[2];
108
+ align = MAX(align, size);
66
+ op->args[2] = cond;
109
+ }
67
+ op->args[3] = label;
68
+ break;
69
+ }
110
+ }
70
+ break;
111
+ break;
71
+
112
+
72
+ default:
113
+ default:
73
+ break;
114
+ g_assert_not_reached();
115
+ }
74
+
116
+
75
+ do_brcond_high:
117
+ return (TCGAtomAlign){ .atom = atmax, .align = align };
76
+ op->opc = INDEX_op_brcond_i32;
77
+ op->args[0] = op->args[1];
78
+ op->args[1] = op->args[3];
79
+ op->args[2] = cond;
80
+ op->args[3] = label;
81
+ break;
82
+
83
+ do_brcond_const:
84
+ if (i == 0) {
85
+ tcg_op_remove(ctx->tcg, op);
86
+ return true;
87
+ }
88
+ op->opc = INDEX_op_br;
89
+ op->args[0] = label;
90
+ break;
91
+ }
92
+ return false;
93
+}
118
+}
94
+
119
+
95
static bool fold_call(OptContext *ctx, TCGOp *op)
120
/*
96
{
121
* Similarly for qemu_ld/st slow path helpers.
97
TCGContext *s = ctx->tcg;
122
* We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
98
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
99
}
100
break;
101
102
- case INDEX_op_brcond2_i32:
103
- i = do_constant_folding_cond2(&op->args[0], &op->args[2],
104
- op->args[4]);
105
- if (i == 0) {
106
- do_brcond_false:
107
- tcg_op_remove(s, op);
108
- continue;
109
- }
110
- if (i > 0) {
111
- do_brcond_true:
112
- op->opc = opc = INDEX_op_br;
113
- op->args[0] = op->args[5];
114
- break;
115
- }
116
- if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
117
- && arg_is_const(op->args[2])
118
- && arg_info(op->args[2])->val == 0
119
- && arg_is_const(op->args[3])
120
- && arg_info(op->args[3])->val == 0) {
121
- /* Simplify LT/GE comparisons vs zero to a single compare
122
- vs the high word of the input. */
123
- do_brcond_high:
124
- op->opc = opc = INDEX_op_brcond_i32;
125
- op->args[0] = op->args[1];
126
- op->args[1] = op->args[3];
127
- op->args[2] = op->args[4];
128
- op->args[3] = op->args[5];
129
- break;
130
- }
131
- if (op->args[4] == TCG_COND_EQ) {
132
- /* Simplify EQ comparisons where one of the pairs
133
- can be simplified. */
134
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
135
- op->args[0], op->args[2],
136
- TCG_COND_EQ);
137
- if (i == 0) {
138
- goto do_brcond_false;
139
- } else if (i > 0) {
140
- goto do_brcond_high;
141
- }
142
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
143
- op->args[1], op->args[3],
144
- TCG_COND_EQ);
145
- if (i == 0) {
146
- goto do_brcond_false;
147
- } else if (i < 0) {
148
- break;
149
- }
150
- do_brcond_low:
151
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
152
- op->opc = INDEX_op_brcond_i32;
153
- op->args[1] = op->args[2];
154
- op->args[2] = op->args[4];
155
- op->args[3] = op->args[5];
156
- break;
157
- }
158
- if (op->args[4] == TCG_COND_NE) {
159
- /* Simplify NE comparisons where one of the pairs
160
- can be simplified. */
161
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
162
- op->args[0], op->args[2],
163
- TCG_COND_NE);
164
- if (i == 0) {
165
- goto do_brcond_high;
166
- } else if (i > 0) {
167
- goto do_brcond_true;
168
- }
169
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
170
- op->args[1], op->args[3],
171
- TCG_COND_NE);
172
- if (i == 0) {
173
- goto do_brcond_low;
174
- } else if (i > 0) {
175
- goto do_brcond_true;
176
- }
177
- }
178
- break;
179
-
180
default:
181
break;
182
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
CASE_OP_32_64_VEC(andc):
185
done = fold_andc(&ctx, op);
186
break;
187
+ case INDEX_op_brcond2_i32:
188
+ done = fold_brcond2(&ctx, op);
189
+ break;
190
CASE_OP_32_64(ctpop):
191
done = fold_ctpop(&ctx, op);
192
break;
193
--
123
--
194
2.25.1
124
2.34.1
195
196
diff view generated by jsdifflib
New patch
1
No change to the ultimate load/store routines yet, so some atomicity
2
conditions not yet honored, but plumbs the change to alignment through
3
the relevant functions.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 27 +++++++++++++++------------
9
1 file changed, 15 insertions(+), 12 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
16
int index;
17
int ofs;
18
int seg;
19
+ TCGAtomAlign aa;
20
} HostAddress;
21
22
bool tcg_target_has_memory_bswap(MemOp memop)
23
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
24
{
25
TCGLabelQemuLdst *ldst = NULL;
26
MemOp opc = get_memop(oi);
27
- unsigned a_bits = get_alignment_bits(opc);
28
- unsigned a_mask = (1 << a_bits) - 1;
29
+ unsigned a_mask;
30
+
31
+#ifdef CONFIG_SOFTMMU
32
+ h->index = TCG_REG_L0;
33
+ h->ofs = 0;
34
+ h->seg = 0;
35
+#else
36
+ *h = x86_guest_base;
37
+#endif
38
+ h->base = addrlo;
39
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
40
+ a_mask = (1 << h->aa.align) - 1;
41
42
#ifdef CONFIG_SOFTMMU
43
int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
* copy the address and mask. For lesser alignments, check that we don't
46
* cross pages for the complete access.
47
*/
48
- if (a_bits >= s_bits) {
49
+ if (a_mask >= s_mask) {
50
tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
51
} else {
52
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
/* TLB Hit. */
55
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
56
offsetof(CPUTLBEntry, addend));
57
-
58
- *h = (HostAddress) {
59
- .base = addrlo,
60
- .index = TCG_REG_L0,
61
- };
62
#else
63
- if (a_bits) {
64
+ if (a_mask) {
65
ldst = new_ldst_label(s);
66
67
ldst->is_ld = is_ld;
68
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
69
ldst->label_ptr[0] = s->code_ptr;
70
s->code_ptr += 4;
71
}
72
-
73
- *h = x86_guest_base;
74
- h->base = addrlo;
75
#endif
76
77
return ldst;
78
--
79
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/aarch64/tcg-target.c.inc | 36 ++++++++++++++++++------------------
5
1 file changed, 18 insertions(+), 18 deletions(-)
1
6
7
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/aarch64/tcg-target.c.inc
10
+++ b/tcg/aarch64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ typedef struct {
12
TCGReg base;
13
TCGReg index;
14
TCGType index_ext;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned a_mask = (1u << a_bits) - 1;
25
+ unsigned a_mask;
26
+
27
+ h->aa = atom_and_align_for_opc(s, opc,
28
+ have_lse2 ? MO_ATOM_WITHIN16
29
+ : MO_ATOM_IFALIGN,
30
+ false);
31
+ a_mask = (1 << h->aa.align) - 1;
32
33
#ifdef CONFIG_SOFTMMU
34
unsigned s_bits = opc & MO_SIZE;
35
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
36
* bits within the address. For unaligned access, we check that we don't
37
* cross pages using the address of the last byte of the access.
38
*/
39
- if (a_bits >= s_bits) {
40
+ if (a_mask >= s_mask) {
41
x3 = addr_reg;
42
} else {
43
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
ldst->label_ptr[0] = s->code_ptr;
46
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
47
48
- *h = (HostAddress){
49
- .base = TCG_REG_X1,
50
- .index = addr_reg,
51
- .index_ext = addr_type
52
- };
53
+ h->base = TCG_REG_X1,
54
+ h->index = addr_reg;
55
+ h->index_ext = addr_type;
56
#else
57
if (a_mask) {
58
ldst = new_ldst_label(s);
59
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
60
}
61
62
if (USE_GUEST_BASE) {
63
- *h = (HostAddress){
64
- .base = TCG_REG_GUEST_BASE,
65
- .index = addr_reg,
66
- .index_ext = addr_type
67
- };
68
+ h->base = TCG_REG_GUEST_BASE;
69
+ h->index = addr_reg;
70
+ h->index_ext = addr_type;
71
} else {
72
- *h = (HostAddress){
73
- .base = addr_reg,
74
- .index = TCG_REG_XZR,
75
- .index_ext = TCG_TYPE_I64
76
- };
77
+ h->base = addr_reg;
78
+ h->index = TCG_REG_XZR;
79
+ h->index_ext = TCG_TYPE_I64;
80
}
81
#endif
82
83
--
84
2.34.1
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
No change to the ultimate load/store routines yet, so some atomicity
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
conditions not yet honored, but plumbs the change to alignment through
3
the relevant functions.
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
7
---
5
tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
8
tcg/arm/tcg-target.c.inc | 39 ++++++++++++++++++++++-----------------
6
1 file changed, 22 insertions(+), 17 deletions(-)
9
1 file changed, 22 insertions(+), 17 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/tcg/arm/tcg-target.c.inc
11
+++ b/tcg/optimize.c
14
+++ b/tcg/arm/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ typedef struct {
13
return fold_const2(ctx, op);
16
TCGReg base;
14
}
17
int index;
15
18
bool index_scratch;
16
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
19
+ TCGAtomAlign aa;
17
+{
20
} HostAddress;
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
21
19
+ uint64_t v1 = arg_info(op->args[1])->val;
22
bool tcg_target_has_memory_bswap(MemOp memop)
20
+ uint64_t v2 = arg_info(op->args[2])->val;
23
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
21
+ int shr = op->args[3];
24
{
25
TCGLabelQemuLdst *ldst = NULL;
26
MemOp opc = get_memop(oi);
27
- MemOp a_bits = get_alignment_bits(opc);
28
- unsigned a_mask = (1 << a_bits) - 1;
29
+ unsigned a_mask;
22
+
30
+
23
+ if (op->opc == INDEX_op_extract2_i64) {
31
+#ifdef CONFIG_SOFTMMU
24
+ v1 >>= shr;
32
+ *h = (HostAddress){
25
+ v2 <<= 64 - shr;
33
+ .cond = COND_AL,
26
+ } else {
34
+ .base = addrlo,
27
+ v1 = (uint32_t)v1 >> shr;
35
+ .index = TCG_REG_R1,
28
+ v2 = (int32_t)v2 << (32 - shr);
36
+ .index_scratch = true,
29
+ }
37
+ };
30
+ return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
38
+#else
31
+ }
39
+ *h = (HostAddress){
32
+ return false;
40
+ .cond = COND_AL,
33
+}
41
+ .base = addrlo,
42
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
43
+ .index_scratch = false,
44
+ };
45
+#endif
34
+
46
+
35
static bool fold_exts(OptContext *ctx, TCGOp *op)
47
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
36
{
48
+ a_mask = (1 << h->aa.align) - 1;
37
return fold_const1(ctx, op);
49
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
50
#ifdef CONFIG_SOFTMMU
39
}
51
int mem_index = get_mmuidx(oi);
40
break;
52
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
41
53
if (TARGET_LONG_BITS == 64) {
42
- CASE_OP_32_64(extract2):
54
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
43
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
55
}
44
- uint64_t v1 = arg_info(op->args[1])->val;
45
- uint64_t v2 = arg_info(op->args[2])->val;
46
- int shr = op->args[3];
47
-
56
-
48
- if (opc == INDEX_op_extract2_i64) {
57
- *h = (HostAddress){
49
- tmp = (v1 >> shr) | (v2 << (64 - shr));
58
- .cond = COND_AL,
50
- } else {
59
- .base = addrlo,
51
- tmp = (int32_t)(((uint32_t)v1 >> shr) |
60
- .index = TCG_REG_R1,
52
- ((uint32_t)v2 << (32 - shr)));
61
- .index_scratch = true,
53
- }
62
- };
54
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
63
#else
55
- continue;
64
if (a_mask) {
56
- }
65
ldst = new_ldst_label(s);
57
- break;
66
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
67
ldst->addrlo_reg = addrlo;
68
ldst->addrhi_reg = addrhi;
69
70
- /* We are expecting a_bits to max out at 7 */
71
+ /* We are expecting alignment to max out at 7 */
72
tcg_debug_assert(a_mask <= 0xff);
73
/* tst addr, #mask */
74
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
75
}
58
-
76
-
59
default:
77
- *h = (HostAddress){
60
break;
78
- .cond = COND_AL,
61
79
- .base = addrlo,
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
- .index = guest_base ? TCG_REG_GUEST_BASE : -1,
63
CASE_OP_32_64(eqv):
81
- .index_scratch = false,
64
done = fold_eqv(&ctx, op);
82
- };
65
break;
83
#endif
66
+ CASE_OP_32_64(extract2):
84
67
+ done = fold_extract2(&ctx, op);
85
return ldst;
68
+ break;
69
CASE_OP_32_64(ext8s):
70
CASE_OP_32_64(ext16s):
71
case INDEX_op_ext32s_i64:
72
--
86
--
73
2.25.1
87
2.34.1
74
75
diff view generated by jsdifflib
1
Recognize the identity function for division.
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/optimize.c | 6 +++++-
4
tcg/loongarch64/tcg-target.c.inc | 6 +++++-
9
1 file changed, 5 insertions(+), 1 deletion(-)
5
1 file changed, 5 insertions(+), 1 deletion(-)
10
6
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
9
--- a/tcg/loongarch64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
10
+++ b/tcg/loongarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
16
12
typedef struct {
17
static bool fold_divide(OptContext *ctx, TCGOp *op)
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
18
{
20
{
19
- return fold_const2(ctx, op);
21
TCGLabelQemuLdst *ldst = NULL;
20
+ if (fold_const2(ctx, op) ||
22
MemOp opc = get_memop(oi);
21
+ fold_xi_to_x(ctx, op, 1)) {
23
- unsigned a_bits = get_alignment_bits(opc);
22
+ return true;
24
+ MemOp a_bits;
23
+ }
25
+
24
+ return false;
26
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
25
}
27
+ a_bits = h->aa.align;
26
28
27
static bool fold_dup(OptContext *ctx, TCGOp *op)
29
#ifdef CONFIG_SOFTMMU
30
unsigned s_bits = opc & MO_SIZE;
28
--
31
--
29
2.25.1
32
2.34.1
30
31
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
3
---
5
tcg/optimize.c | 25 +++++++++++++++----------
4
tcg/mips/tcg-target.c.inc | 15 +++++++++------
6
1 file changed, 15 insertions(+), 10 deletions(-)
5
1 file changed, 9 insertions(+), 6 deletions(-)
7
6
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
7
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
8
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
9
--- a/tcg/mips/tcg-target.c.inc
11
+++ b/tcg/optimize.c
10
+++ b/tcg/mips/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
13
return fold_const1(ctx, op);
12
13
typedef struct {
14
TCGReg base;
15
- MemOp align;
16
+ TCGAtomAlign aa;
17
} HostAddress;
18
19
bool tcg_target_has_memory_bswap(MemOp memop)
20
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
21
{
22
TCGLabelQemuLdst *ldst = NULL;
23
MemOp opc = get_memop(oi);
24
- unsigned a_bits = get_alignment_bits(opc);
25
+ MemOp a_bits;
26
unsigned s_bits = opc & MO_SIZE;
27
- unsigned a_mask = (1 << a_bits) - 1;
28
+ unsigned a_mask;
29
TCGReg base;
30
31
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
32
+ a_bits = h->aa.align;
33
+ a_mask = (1 << a_bits) - 1;
34
+
35
#ifdef CONFIG_SOFTMMU
36
unsigned s_mask = (1 << s_bits) - 1;
37
int mem_index = get_mmuidx(oi);
38
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
39
#endif
40
41
h->base = base;
42
- h->align = a_bits;
43
return ldst;
14
}
44
}
15
45
16
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
17
+{
47
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
48
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
19
+ uint64_t t1 = arg_info(op->args[1])->val;
49
20
+ uint64_t t2 = arg_info(op->args[2])->val;
50
- if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
21
+
51
+ if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
22
+ t1 = deposit64(t1, op->args[3], op->args[4], t2);
52
tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
53
} else {
24
+ }
54
tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
25
+ return false;
55
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
26
+}
56
27
+
57
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
28
static bool fold_divide(OptContext *ctx, TCGOp *op)
58
29
{
59
- if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
30
return fold_const2(ctx, op);
60
+ if (use_mips32r6_instructions || h.aa.align >= (opc & MO_SIZE)) {
31
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
32
}
62
} else {
33
break;
63
tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
34
35
- CASE_OP_32_64(deposit):
36
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
37
- tmp = deposit64(arg_info(op->args[1])->val,
38
- op->args[3], op->args[4],
39
- arg_info(op->args[2])->val);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
42
- }
43
- break;
44
-
45
default:
46
break;
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
CASE_OP_32_64(ctpop):
50
done = fold_ctpop(&ctx, op);
51
break;
52
+ CASE_OP_32_64(deposit):
53
+ done = fold_deposit(&ctx, op);
54
+ break;
55
CASE_OP_32_64(div):
56
CASE_OP_32_64(divu):
57
done = fold_divide(&ctx, op);
58
--
64
--
59
2.25.1
65
2.34.1
60
61
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/ppc/tcg-target.c.inc | 19 ++++++++++++++++++-
5
1 file changed, 18 insertions(+), 1 deletion(-)
1
6
7
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/ppc/tcg-target.c.inc
10
+++ b/tcg/ppc/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
+ MemOp a_bits;
25
+
26
+ /*
27
+ * Book II, Section 1.4, Single-Copy Atomicity, specifies:
28
+ *
29
+ * Before 3.0, "An access that is not atomic is performed as a set of
30
+ * smaller disjoint atomic accesses. In general, the number and alignment
31
+ * of these accesses are implementation-dependent." Thus MO_ATOM_IFALIGN.
32
+ *
33
+ * As of 3.0, "the non-atomic access is performed as described in
34
+ * the corresponding list", which matches MO_ATOM_SUBALIGN.
35
+ */
36
+ h->aa = atom_and_align_for_opc(s, opc,
37
+ have_isa_3_00 ? MO_ATOM_SUBALIGN
38
+ : MO_ATOM_IFALIGN,
39
+ false);
40
+ a_bits = h->aa.align;
41
42
#ifdef CONFIG_SOFTMMU
43
int mem_index = get_mmuidx(oi);
44
--
45
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/riscv/tcg-target.c.inc | 13 ++++++++-----
5
1 file changed, 8 insertions(+), 5 deletions(-)
1
6
7
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/riscv/tcg-target.c.inc
10
+++ b/tcg/riscv/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
12
{
13
TCGLabelQemuLdst *ldst = NULL;
14
MemOp opc = get_memop(oi);
15
- unsigned a_bits = get_alignment_bits(opc);
16
- unsigned a_mask = (1u << a_bits) - 1;
17
+ TCGAtomAlign aa;
18
+ unsigned a_mask;
19
+
20
+ aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
21
+ a_mask = (1u << aa.align) - 1;
22
23
#ifdef CONFIG_SOFTMMU
24
unsigned s_bits = opc & MO_SIZE;
25
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
26
* cross pages using the address of the last byte of the access.
27
*/
28
addr_adj = addr_reg;
29
- if (a_bits < s_bits) {
30
+ if (a_mask < s_mask) {
31
addr_adj = TCG_REG_TMP0;
32
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
33
addr_adj, addr_reg, s_mask - a_mask);
34
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
35
ldst->oi = oi;
36
ldst->addrlo_reg = addr_reg;
37
38
- /* We are expecting a_bits max 7, so we can always use andi. */
39
- tcg_debug_assert(a_bits < 12);
40
+ /* We are expecting alignment max 7, so we can always use andi. */
41
+ tcg_debug_assert(a_mask == sextreg(a_mask, 0, 12));
42
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
43
44
ldst->label_ptr[0] = s->code_ptr;
45
--
46
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/s390x/tcg-target.c.inc | 11 +++++++----
5
1 file changed, 7 insertions(+), 4 deletions(-)
1
6
7
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/s390x/tcg-target.c.inc
10
+++ b/tcg/s390x/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ typedef struct {
12
TCGReg base;
13
TCGReg index;
14
int disp;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned a_mask = (1u << a_bits) - 1;
25
+ unsigned a_mask;
26
+
27
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
28
+ a_mask = (1 << h->aa.align) - 1;
29
30
#ifdef CONFIG_SOFTMMU
31
unsigned s_bits = opc & MO_SIZE;
32
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
33
* bits within the address. For unaligned access, we check that we don't
34
* cross pages using the address of the last byte of the access.
35
*/
36
- a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
37
+ a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
38
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
39
if (a_off == 0) {
40
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
41
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
42
ldst->addrlo_reg = addr_reg;
43
44
/* We are expecting a_bits to max out at 7, much lower than TMLL. */
45
- tcg_debug_assert(a_bits < 16);
46
+ tcg_debug_assert(a_mask <= 0xffff);
47
tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
48
49
tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
50
--
51
2.34.1
diff view generated by jsdifflib
New patch
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
---
4
tcg/sparc64/tcg-target.c.inc | 21 ++++++++++++---------
5
1 file changed, 12 insertions(+), 9 deletions(-)
1
6
7
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
8
index XXXXXXX..XXXXXXX 100644
9
--- a/tcg/sparc64/tcg-target.c.inc
10
+++ b/tcg/sparc64/tcg-target.c.inc
11
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
12
typedef struct {
13
TCGReg base;
14
TCGReg index;
15
+ TCGAtomAlign aa;
16
} HostAddress;
17
18
bool tcg_target_has_memory_bswap(MemOp memop)
19
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
20
{
21
TCGLabelQemuLdst *ldst = NULL;
22
MemOp opc = get_memop(oi);
23
- unsigned a_bits = get_alignment_bits(opc);
24
- unsigned s_bits = opc & MO_SIZE;
25
+ MemOp s_bits = opc & MO_SIZE;
26
unsigned a_mask;
27
28
/* We don't support unaligned accesses. */
29
- a_bits = MAX(a_bits, s_bits);
30
- a_mask = (1u << a_bits) - 1;
31
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
32
+ h->aa.align = MAX(h->aa.align, s_bits);
33
+ a_mask = (1u << h->aa.align) - 1;
34
35
#ifdef CONFIG_SOFTMMU
36
int mem_index = get_mmuidx(oi);
37
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
38
cc = TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC;
39
tcg_out_bpcc0(s, COND_NE, BPCC_PN | cc, 0);
40
#else
41
- if (a_bits != s_bits) {
42
- /*
43
- * Test for at least natural alignment, and defer
44
- * everything else to the helper functions.
45
- */
46
+ /*
47
+ * If the size equals the required alignment, we can skip the test
48
+ * and allow host SIGBUS to deliver SIGBUS to the guest.
49
+ * Otherwise, test for at least natural alignment and defer
50
+ * everything else to the helper functions.
51
+ */
52
+ if (s_bits != get_alignment_bits(opc)) {
53
tcg_debug_assert(check_fit_tl(a_mask, 13));
54
tcg_out_arithi(s, TCG_REG_G0, addr_reg, a_mask, ARITH_ANDCC);
55
56
--
57
2.34.1
diff view generated by jsdifflib
New patch
1
Use the fpu to perform 64-bit loads and stores.
1
2
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/i386/tcg-target.c.inc | 44 +++++++++++++++++++++++++++++++++------
7
1 file changed, 38 insertions(+), 6 deletions(-)
8
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/i386/tcg-target.c.inc
12
+++ b/tcg/i386/tcg-target.c.inc
13
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
14
#define OPC_GRP5 (0xff)
15
#define OPC_GRP14 (0x73 | P_EXT | P_DATA16)
16
17
+#define OPC_ESCDF (0xdf)
18
+#define ESCDF_FILD_m64 5
19
+#define ESCDF_FISTP_m64 7
20
+
21
/* Group 1 opcode extensions for 0x80-0x83.
22
These are also used as modifiers for OPC_ARITH. */
23
#define ARITH_ADD 0
24
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
25
datalo = datahi;
26
datahi = t;
27
}
28
- if (h.base == datalo || h.index == datalo) {
29
+ if (h.aa.atom == MO_64) {
30
+ /*
31
+ * Atomicity requires that we use use a single 8-byte load.
32
+ * For simplicity and code size, always use the FPU for this.
33
+ * Similar insns using SSE/AVX are merely larger.
34
+ * Load from memory in one go, then store back to the stack,
35
+ * from whence we can load into the correct integer regs.
36
+ */
37
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FILD_m64,
38
+ h.base, h.index, 0, h.ofs);
39
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FISTP_m64, TCG_REG_ESP, 0);
40
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
41
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
42
+ } else if (h.base == datalo || h.index == datalo) {
43
tcg_out_modrm_sib_offset(s, OPC_LEA, datahi,
44
h.base, h.index, 0, h.ofs);
45
tcg_out_modrm_offset(s, movop + h.seg, datalo, datahi, 0);
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
47
if (TCG_TARGET_REG_BITS == 64) {
48
tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
49
h.base, h.index, 0, h.ofs);
50
+ break;
51
+ }
52
+ if (use_movbe) {
53
+ TCGReg t = datalo;
54
+ datalo = datahi;
55
+ datahi = t;
56
+ }
57
+ if (h.aa.atom == MO_64) {
58
+ /*
59
+ * Atomicity requires that we use use one 8-byte store.
60
+ * For simplicity, and code size, always use the FPU for this.
61
+ * Similar insns using SSE/AVX are merely larger.
62
+ * Assemble the 8-byte quantity in required endianness
63
+ * on the stack, load to coproc unit, and store.
64
+ */
65
+ tcg_out_modrm_offset(s, movop, datalo, TCG_REG_ESP, 0);
66
+ tcg_out_modrm_offset(s, movop, datahi, TCG_REG_ESP, 4);
67
+ tcg_out_modrm_offset(s, OPC_ESCDF, ESCDF_FILD_m64, TCG_REG_ESP, 0);
68
+ tcg_out_modrm_sib_offset(s, OPC_ESCDF + h.seg, ESCDF_FISTP_m64,
69
+ h.base, h.index, 0, h.ofs);
70
} else {
71
- if (use_movbe) {
72
- TCGReg t = datalo;
73
- datalo = datahi;
74
- datahi = t;
75
- }
76
tcg_out_modrm_sib_offset(s, movop + h.seg, datalo,
77
h.base, h.index, 0, h.ofs);
78
tcg_out_modrm_sib_offset(s, movop + h.seg, datahi,
79
--
80
2.34.1
diff view generated by jsdifflib
1
Add two additional helpers, fold_add2_i32 and fold_sub2_i32
1
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
2
which will not be simple wrappers forever.
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
4
tcg/i386/tcg-target.h | 3 +-
9
1 file changed, 44 insertions(+), 26 deletions(-)
5
tcg/i386/tcg-target.c.inc | 181 +++++++++++++++++++++++++++++++++++++-
6
2 files changed, 180 insertions(+), 4 deletions(-)
10
7
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
10
--- a/tcg/i386/tcg-target.h
14
+++ b/tcg/optimize.c
11
+++ b/tcg/i386/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
16
return fold_const2(ctx, op);
13
#define TCG_TARGET_HAS_qemu_st8_i32 1
17
}
14
#endif
18
15
19
+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
16
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
20
+{
17
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
21
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
18
+ (TCG_TARGET_REG_BITS == 64 && have_atomic16)
22
+ arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
19
23
+ uint32_t al = arg_info(op->args[2])->val;
20
/* We do not support older SSE systems, only beginning with AVX1. */
24
+ uint32_t ah = arg_info(op->args[3])->val;
21
#define TCG_TARGET_HAS_v64 have_avx1
25
+ uint32_t bl = arg_info(op->args[4])->val;
22
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
26
+ uint32_t bh = arg_info(op->args[5])->val;
23
index XXXXXXX..XXXXXXX 100644
27
+ uint64_t a = ((uint64_t)ah << 32) | al;
24
--- a/tcg/i386/tcg-target.c.inc
28
+ uint64_t b = ((uint64_t)bh << 32) | bl;
25
+++ b/tcg/i386/tcg-target.c.inc
29
+ TCGArg rl, rh;
26
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
30
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
27
#endif
31
+
28
};
32
+ if (add) {
29
33
+ a += b;
30
+#define TCG_TMP_VEC TCG_REG_XMM5
34
+ } else {
31
+
35
+ a -= b;
32
static const int tcg_target_call_iarg_regs[] = {
36
+ }
33
#if TCG_TARGET_REG_BITS == 64
37
+
34
#if defined(_WIN64)
38
+ rl = op->args[0];
35
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
39
+ rh = op->args[1];
36
#define OPC_PCMPGTW (0x65 | P_EXT | P_DATA16)
40
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
37
#define OPC_PCMPGTD (0x66 | P_EXT | P_DATA16)
41
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
38
#define OPC_PCMPGTQ (0x37 | P_EXT38 | P_DATA16)
39
+#define OPC_PEXTRD (0x16 | P_EXT3A | P_DATA16)
40
+#define OPC_PINSRD (0x22 | P_EXT3A | P_DATA16)
41
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
42
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
43
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
44
@@ -XXX,XX +XXX,XX @@ typedef struct {
45
46
bool tcg_target_has_memory_bswap(MemOp memop)
47
{
48
- return have_movbe;
49
+ TCGAtomAlign aa;
50
+
51
+ if (!have_movbe) {
52
+ return false;
53
+ }
54
+ if ((memop & MO_SIZE) <= MO_64) {
42
+ return true;
55
+ return true;
43
+ }
56
+ }
44
+ return false;
57
+
58
+ /*
59
+ * Reject 16-byte memop with 16-byte atomicity, i.e. VMOVDQA,
60
+ * but do allow a pair of 64-bit operations, i.e. MOVBEQ.
61
+ */
62
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
63
+ return aa.atom <= MO_64;
64
}
65
66
/*
67
@@ -XXX,XX +XXX,XX @@ static const TCGLdstHelperParam ldst_helper_param = {
68
static const TCGLdstHelperParam ldst_helper_param = { };
69
#endif
70
71
+static void tcg_out_vec_to_pair(TCGContext *s, TCGType type,
72
+ TCGReg l, TCGReg h, TCGReg v)
73
+{
74
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
75
+
76
+ /* vpmov{d,q} %v, %l */
77
+ tcg_out_vex_modrm(s, OPC_MOVD_EyVy + rexw, v, 0, l);
78
+ /* vpextr{d,q} $1, %v, %h */
79
+ tcg_out_vex_modrm(s, OPC_PEXTRD + rexw, v, 0, h);
80
+ tcg_out8(s, 1);
45
+}
81
+}
46
+
82
+
47
+static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
83
+static void tcg_out_pair_to_vec(TCGContext *s, TCGType type,
84
+ TCGReg v, TCGReg l, TCGReg h)
48
+{
85
+{
49
+ return fold_addsub2_i32(ctx, op, true);
86
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
87
+
88
+ /* vmov{d,q} %l, %v */
89
+ tcg_out_vex_modrm(s, OPC_MOVD_VyEy + rexw, v, 0, l);
90
+ /* vpinsr{d,q} $1, %h, %v, %v */
91
+ tcg_out_vex_modrm(s, OPC_PINSRD + rexw, v, v, h);
92
+ tcg_out8(s, 1);
50
+}
93
+}
51
+
94
+
52
static bool fold_and(OptContext *ctx, TCGOp *op)
95
/*
96
* Generate code for the slow path for a load at the end of block
97
*/
98
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
53
{
99
{
54
return fold_const2(ctx, op);
100
TCGLabelQemuLdst *ldst = NULL;
55
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
101
MemOp opc = get_memop(oi);
56
return fold_const2(ctx, op);
102
+ MemOp s_bits = opc & MO_SIZE;
57
}
103
unsigned a_mask;
58
104
59
+static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
105
#ifdef CONFIG_SOFTMMU
60
+{
106
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
61
+ return fold_addsub2_i32(ctx, op, false);
107
*h = x86_guest_base;
62
+}
108
#endif
63
+
109
h->base = addrlo;
64
static bool fold_xor(OptContext *ctx, TCGOp *op)
110
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
65
{
111
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
66
return fold_const2(ctx, op);
112
a_mask = (1 << h->aa.align) - 1;
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
68
}
114
#ifdef CONFIG_SOFTMMU
69
break;
115
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
70
116
TCGType tlbtype = TCG_TYPE_I32;
71
- case INDEX_op_add2_i32:
117
int trexw = 0, hrexw = 0, tlbrexw = 0;
72
- case INDEX_op_sub2_i32:
118
unsigned mem_index = get_mmuidx(oi);
73
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
119
- unsigned s_bits = opc & MO_SIZE;
74
- && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
120
unsigned s_mask = (1 << s_bits) - 1;
75
- uint32_t al = arg_info(op->args[2])->val;
121
target_ulong tlb_mask;
76
- uint32_t ah = arg_info(op->args[3])->val;
122
77
- uint32_t bl = arg_info(op->args[4])->val;
123
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
78
- uint32_t bh = arg_info(op->args[5])->val;
124
h.base, h.index, 0, h.ofs + 4);
79
- uint64_t a = ((uint64_t)ah << 32) | al;
125
}
80
- uint64_t b = ((uint64_t)bh << 32) | bl;
126
break;
81
- TCGArg rl, rh;
127
+
82
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
128
+ case MO_128:
83
-
129
+ {
84
- if (opc == INDEX_op_add2_i32) {
130
+ TCGLabel *l1 = NULL, *l2 = NULL;
85
- a += b;
131
+ bool use_pair = h.aa.atom < MO_128;
86
- } else {
132
+
87
- a -= b;
133
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
88
- }
134
+
89
-
135
+ if (!use_pair) {
90
- rl = op->args[0];
136
+ tcg_debug_assert(!use_movbe);
91
- rh = op->args[1];
137
+ /*
92
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
138
+ * Atomicity requires that we use use VMOVDQA.
93
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
139
+ * If we've already checked for 16-byte alignment, that's all
94
- continue;
140
+ * we need. If we arrive here with lesser alignment, then we
95
- }
141
+ * have determined that less than 16-byte alignment can be
96
- break;
142
+ * satisfied with two 8-byte loads.
97
143
+ */
98
default:
144
+ if (h.aa.align < MO_128) {
99
break;
145
+ use_pair = true;
100
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
146
+ l1 = gen_new_label();
101
CASE_OP_32_64_VEC(add):
147
+ l2 = gen_new_label();
102
done = fold_add(&ctx, op);
148
+
103
break;
149
+ tcg_out_testi(s, h.base, 15);
104
+ case INDEX_op_add2_i32:
150
+ tcg_out_jxx(s, JCC_JNE, l2, true);
105
+ done = fold_add2_i32(&ctx, op);
151
+ }
106
+ break;
152
+
107
CASE_OP_32_64_VEC(and):
153
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_VxWx + h.seg,
108
done = fold_and(&ctx, op);
154
+ TCG_TMP_VEC, 0,
109
break;
155
+ h.base, h.index, 0, h.ofs);
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
156
+ tcg_out_vec_to_pair(s, TCG_TYPE_I64, datalo,
111
CASE_OP_32_64_VEC(sub):
157
+ datahi, TCG_TMP_VEC);
112
done = fold_sub(&ctx, op);
158
+
113
break;
159
+ if (use_pair) {
114
+ case INDEX_op_sub2_i32:
160
+ tcg_out_jxx(s, JCC_JMP, l1, true);
115
+ done = fold_sub2_i32(&ctx, op);
161
+ tcg_out_label(s, l2);
116
+ break;
162
+ }
117
CASE_OP_32_64_VEC(xor):
163
+ }
118
done = fold_xor(&ctx, op);
164
+ if (use_pair) {
119
break;
165
+ if (use_movbe) {
166
+ TCGReg t = datalo;
167
+ datalo = datahi;
168
+ datahi = t;
169
+ }
170
+ if (h.base == datalo || h.index == datalo) {
171
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, datahi,
172
+ h.base, h.index, 0, h.ofs);
173
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
174
+ datalo, datahi, 0);
175
+ tcg_out_modrm_offset(s, movop + P_REXW + h.seg,
176
+ datahi, datahi, 8);
177
+ } else {
178
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
179
+ h.base, h.index, 0, h.ofs);
180
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
181
+ h.base, h.index, 0, h.ofs + 8);
182
+ }
183
+ }
184
+ if (l1) {
185
+ tcg_out_label(s, l1);
186
+ }
187
+ }
188
+ break;
189
+
190
default:
191
g_assert_not_reached();
192
}
193
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
194
h.base, h.index, 0, h.ofs + 4);
195
}
196
break;
197
+
198
+ case MO_128:
199
+ {
200
+ TCGLabel *l1 = NULL, *l2 = NULL;
201
+ bool use_pair = h.aa.atom < MO_128;
202
+
203
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
204
+
205
+ if (!use_pair) {
206
+ tcg_debug_assert(!use_movbe);
207
+ /*
208
+ * Atomicity requires that we use use VMOVDQA.
209
+ * If we've already checked for 16-byte alignment, that's all
210
+ * we need. If we arrive here with lesser alignment, then we
211
+ * have determined that less that 16-byte alignment can be
212
+ * satisfied with two 8-byte loads.
213
+ */
214
+ if (h.aa.align < MO_128) {
215
+ use_pair = true;
216
+ l1 = gen_new_label();
217
+ l2 = gen_new_label();
218
+
219
+ tcg_out_testi(s, h.base, 15);
220
+ tcg_out_jxx(s, JCC_JNE, l2, true);
221
+ }
222
+
223
+ tcg_out_pair_to_vec(s, TCG_TYPE_I64, TCG_TMP_VEC,
224
+ datalo, datahi);
225
+ tcg_out_vex_modrm_sib_offset(s, OPC_MOVDQA_WxVx + h.seg,
226
+ TCG_TMP_VEC, 0,
227
+ h.base, h.index, 0, h.ofs);
228
+
229
+ if (use_pair) {
230
+ tcg_out_jxx(s, JCC_JMP, l1, true);
231
+ tcg_out_label(s, l2);
232
+ }
233
+ }
234
+ if (use_pair) {
235
+ if (use_movbe) {
236
+ TCGReg t = datalo;
237
+ datalo = datahi;
238
+ datahi = t;
239
+ }
240
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datalo,
241
+ h.base, h.index, 0, h.ofs);
242
+ tcg_out_modrm_sib_offset(s, movop + P_REXW + h.seg, datahi,
243
+ h.base, h.index, 0, h.ofs + 8);
244
+ }
245
+ if (l1) {
246
+ tcg_out_label(s, l1);
247
+ }
248
+ }
249
+ break;
250
+
251
default:
252
g_assert_not_reached();
253
}
254
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
255
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
256
}
257
break;
258
+ case INDEX_op_qemu_ld_i128:
259
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
260
+ tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
261
+ break;
262
case INDEX_op_qemu_st_i32:
263
case INDEX_op_qemu_st8_i32:
264
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
265
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
266
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
267
}
268
break;
269
+ case INDEX_op_qemu_st_i128:
270
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
271
+ tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
272
+ break;
273
274
OP_32_64(mulu2):
275
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
276
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
277
: TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
278
: C_O0_I4(L, L, L, L));
279
280
+ case INDEX_op_qemu_ld_i128:
281
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
282
+ return C_O2_I1(r, r, L);
283
+ case INDEX_op_qemu_st_i128:
284
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
285
+ return C_O0_I3(L, L, L);
286
+
287
case INDEX_op_brcond2_i32:
288
return C_O0_I4(r, r, ri, ri);
289
290
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
291
292
s->reserved_regs = 0;
293
tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
294
+ tcg_regset_set_reg(s->reserved_regs, TCG_TMP_VEC);
295
#ifdef _WIN64
296
/* These are call saved, and we don't save them, so don't use them. */
297
tcg_regset_set_reg(s->reserved_regs, TCG_REG_XMM6);
120
--
298
--
121
2.25.1
299
2.34.1
122
123
diff view generated by jsdifflib
1
Pull the "op r, a, a => movi r, 0" optimization into a function,
1
We will need to allocate a second general-purpose temporary.
2
and use it in the outer opcode fold functions.
2
Rename the existing temps to add a distinguishing number.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
7
tcg/aarch64/tcg-target.c.inc | 50 ++++++++++++++++++------------------
9
1 file changed, 24 insertions(+), 17 deletions(-)
8
1 file changed, 25 insertions(+), 25 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
16
return false;
15
bool have_lse;
16
bool have_lse2;
17
18
-#define TCG_REG_TMP TCG_REG_X30
19
-#define TCG_VEC_TMP TCG_REG_V31
20
+#define TCG_REG_TMP0 TCG_REG_X30
21
+#define TCG_VEC_TMP0 TCG_REG_V31
22
23
#ifndef CONFIG_SOFTMMU
24
/* Note that XZR cannot be encoded in the address base register slot,
25
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
26
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
27
TCGReg r, TCGReg base, intptr_t offset)
28
{
29
- TCGReg temp = TCG_REG_TMP;
30
+ TCGReg temp = TCG_REG_TMP0;
31
32
if (offset < -0xffffff || offset > 0xffffff) {
33
tcg_out_movi(s, TCG_TYPE_PTR, temp, offset);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
35
}
36
37
/* Worst-case scenario, move offset to temp register, use reg offset. */
38
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
39
- tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
40
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, offset);
41
+ tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP0);
17
}
42
}
18
43
19
+/* If the binary operation has both arguments equal, fold to @i. */
44
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
20
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *target)
21
+{
46
if (offset == sextract64(offset, 0, 26)) {
22
+ if (args_are_copies(op->args[1], op->args[2])) {
47
tcg_out_insn(s, 3206, BL, offset);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
48
} else {
24
+ }
49
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
25
+ return false;
50
- tcg_out_insn(s, 3207, BLR, TCG_REG_TMP);
26
+}
51
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
27
+
52
+ tcg_out_insn(s, 3207, BLR, TCG_REG_TMP0);
28
/*
53
}
29
* These outermost fold_<op> functions are sorted alphabetically.
54
}
30
*/
55
31
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
56
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
32
57
AArch64Insn insn;
33
static bool fold_andc(OptContext *ctx, TCGOp *op)
58
59
if (rl == ah || (!const_bh && rl == bh)) {
60
- rl = TCG_REG_TMP;
61
+ rl = TCG_REG_TMP0;
62
}
63
64
if (const_bl) {
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
66
possibility of adding 0+const in the low part, and the
67
immediate add instructions encode XSP not XZR. Don't try
68
anything more elaborate here than loading another zero. */
69
- al = TCG_REG_TMP;
70
+ al = TCG_REG_TMP0;
71
tcg_out_movi(s, ext, al, 0);
72
}
73
tcg_out_insn_3401(s, insn, ext, rl, al, bl);
74
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
34
{
75
{
35
- return fold_const2(ctx, op);
76
TCGReg a1 = a0;
36
+ if (fold_const2(ctx, op) ||
77
if (is_ctz) {
37
+ fold_xx_to_i(ctx, op, 0)) {
78
- a1 = TCG_REG_TMP;
38
+ return true;
79
+ a1 = TCG_REG_TMP0;
39
+ }
80
tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
40
+ return false;
81
}
82
if (const_b && b == (ext ? 64 : 32)) {
83
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
84
AArch64Insn sel = I3506_CSEL;
85
86
tcg_out_cmp(s, ext, a0, 0, 1);
87
- tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
88
+ tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP0, a1);
89
90
if (const_b) {
91
if (b == -1) {
92
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
93
b = d;
94
}
95
}
96
- tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
97
+ tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP0, b, TCG_COND_NE);
98
}
41
}
99
}
42
100
43
static bool fold_brcond(OptContext *ctx, TCGOp *op)
101
@@ -XXX,XX +XXX,XX @@ bool tcg_target_has_memory_bswap(MemOp memop)
44
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
45
46
static bool fold_sub(OptContext *ctx, TCGOp *op)
47
{
48
- return fold_const2(ctx, op);
49
+ if (fold_const2(ctx, op) ||
50
+ fold_xx_to_i(ctx, op, 0)) {
51
+ return true;
52
+ }
53
+ return false;
54
}
102
}
55
103
56
static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
104
static const TCGLdstHelperParam ldst_helper_param = {
57
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
105
- .ntmp = 1, .tmp = { TCG_REG_TMP }
58
106
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
59
static bool fold_xor(OptContext *ctx, TCGOp *op)
107
};
60
{
108
61
- return fold_const2(ctx, op);
109
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
62
+ if (fold_const2(ctx, op) ||
110
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto_tb(TCGContext *s, int which)
63
+ fold_xx_to_i(ctx, op, 0)) {
111
64
+ return true;
112
set_jmp_insn_offset(s, which);
65
+ }
113
tcg_out32(s, I3206_B);
66
+ return false;
114
- tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
115
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP0);
116
set_jmp_reset_offset(s, which);
67
}
117
}
68
118
69
/* Propagate constants and copies, fold constant expressions. */
119
@@ -XXX,XX +XXX,XX @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
120
ptrdiff_t i_offset = i_addr - jmp_rx;
71
break;
121
122
/* Note that we asserted this in range in tcg_out_goto_tb. */
123
- insn = deposit32(I3305_LDR | TCG_REG_TMP, 5, 19, i_offset >> 2);
124
+ insn = deposit32(I3305_LDR | TCG_REG_TMP0, 5, 19, i_offset >> 2);
125
}
126
qatomic_set((uint32_t *)jmp_rw, insn);
127
flush_idcache_range(jmp_rx, jmp_rw, 4);
128
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
129
130
case INDEX_op_rem_i64:
131
case INDEX_op_rem_i32:
132
- tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
133
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
134
+ tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP0, a1, a2);
135
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
136
break;
137
case INDEX_op_remu_i64:
138
case INDEX_op_remu_i32:
139
- tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
140
- tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
141
+ tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP0, a1, a2);
142
+ tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP0, a2, a1);
143
break;
144
145
case INDEX_op_shl_i64:
146
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
147
if (c2) {
148
tcg_out_rotl(s, ext, a0, a1, a2);
149
} else {
150
- tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
151
- tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
152
+ tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP0, TCG_REG_XZR, a2);
153
+ tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP0);
72
}
154
}
73
155
break;
74
- /* Simplify expression for "op r, a, a => movi r, 0" cases */
156
75
- switch (opc) {
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
76
- CASE_OP_32_64_VEC(andc):
158
break;
77
- CASE_OP_32_64_VEC(sub):
159
}
78
- CASE_OP_32_64_VEC(xor):
160
}
79
- if (args_are_copies(op->args[1], op->args[2])) {
161
- tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
80
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
162
- a2 = TCG_VEC_TMP;
81
- continue;
163
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP0, 0);
82
- }
164
+ a2 = TCG_VEC_TMP0;
83
- break;
165
}
84
- default:
166
if (is_scalar) {
85
- break;
167
insn = cmp_scalar_insn[cond];
86
- }
168
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
87
-
169
s->reserved_regs = 0;
88
/*
170
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
89
* Process each opcode.
171
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
90
* Sorted alphabetically by opcode as much as possible.
172
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
174
- tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP);
175
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
177
}
178
179
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
91
--
180
--
92
2.25.1
181
2.34.1
93
94
diff view generated by jsdifflib
1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
1
Use LDXP+STXP when LSE2 is not present and 16-byte atomicity is required,
2
and LDP/STP otherwise. This requires allocating a second general-purpose
3
temporary, as Rs cannot overlap Rn in STXP.
2
4
3
Addition of not and xor on 128-bit integers.
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
5
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
6
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
7
Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
8
[rth: Split out logical operations.]
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
7
---
12
include/qemu/int128.h | 20 ++++++++++++++++++++
8
tcg/aarch64/tcg-target-con-set.h | 2 +
13
1 file changed, 20 insertions(+)
9
tcg/aarch64/tcg-target.h | 11 +-
10
tcg/aarch64/tcg-target.c.inc | 179 ++++++++++++++++++++++++++++++-
11
3 files changed, 189 insertions(+), 3 deletions(-)
14
12
15
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
13
diff --git a/tcg/aarch64/tcg-target-con-set.h b/tcg/aarch64/tcg-target-con-set.h
16
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/int128.h
15
--- a/tcg/aarch64/tcg-target-con-set.h
18
+++ b/include/qemu/int128.h
16
+++ b/tcg/aarch64/tcg-target-con-set.h
19
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(lZ, l)
20
return a;
18
C_O0_I2(r, rA)
19
C_O0_I2(rZ, r)
20
C_O0_I2(w, r)
21
+C_O0_I3(lZ, lZ, l)
22
C_O1_I1(r, l)
23
C_O1_I1(r, r)
24
C_O1_I1(w, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I2(w, w, wO)
26
C_O1_I2(w, w, wZ)
27
C_O1_I3(w, w, w, w)
28
C_O1_I4(r, r, rA, rZ, rZ)
29
+C_O2_I1(r, r, l)
30
C_O2_I4(r, r, rZ, rZ, rA, rMZ)
31
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/tcg/aarch64/tcg-target.h
34
+++ b/tcg/aarch64/tcg-target.h
35
@@ -XXX,XX +XXX,XX @@ extern bool have_lse2;
36
#define TCG_TARGET_HAS_muluh_i64 1
37
#define TCG_TARGET_HAS_mulsh_i64 1
38
39
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
40
+/*
41
+ * Without FEAT_LSE2, we must use LDXP+STXP to implement atomic 128-bit load,
42
+ * which requires writable pages. We must defer to the helper for user-only,
43
+ * but in system mode all ram is writable for the host.
44
+ */
45
+#ifdef CONFIG_USER_ONLY
46
+#define TCG_TARGET_HAS_qemu_ldst_i128 have_lse2
47
+#else
48
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
49
+#endif
50
51
#define TCG_TARGET_HAS_v64 1
52
#define TCG_TARGET_HAS_v128 1
53
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
54
index XXXXXXX..XXXXXXX 100644
55
--- a/tcg/aarch64/tcg-target.c.inc
56
+++ b/tcg/aarch64/tcg-target.c.inc
57
@@ -XXX,XX +XXX,XX @@ bool have_lse;
58
bool have_lse2;
59
60
#define TCG_REG_TMP0 TCG_REG_X30
61
+#define TCG_REG_TMP1 TCG_REG_X17
62
#define TCG_VEC_TMP0 TCG_REG_V31
63
64
#ifndef CONFIG_SOFTMMU
65
@@ -XXX,XX +XXX,XX @@ typedef enum {
66
I3305_LDR_v64 = 0x5c000000,
67
I3305_LDR_v128 = 0x9c000000,
68
69
+ /* Load/store exclusive. */
70
+ I3306_LDXP = 0xc8600000,
71
+ I3306_STXP = 0xc8200000,
72
+
73
/* Load/store register. Described here as 3.3.12, but the helper
74
that emits them can transform to 3.3.10 or 3.3.13. */
75
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
76
@@ -XXX,XX +XXX,XX @@ typedef enum {
77
I3406_ADR = 0x10000000,
78
I3406_ADRP = 0x90000000,
79
80
+ /* Add/subtract extended register instructions. */
81
+ I3501_ADD = 0x0b200000,
82
+
83
/* Add/subtract shifted register instructions (without a shift). */
84
I3502_ADD = 0x0b000000,
85
I3502_ADDS = 0x2b000000,
86
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn,
87
tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
21
}
88
}
22
89
23
+static inline Int128 int128_not(Int128 a)
90
+static void tcg_out_insn_3306(TCGContext *s, AArch64Insn insn, TCGReg rs,
24
+{
91
+ TCGReg rt, TCGReg rt2, TCGReg rn)
25
+ return ~a;
92
+{
26
+}
93
+ tcg_out32(s, insn | rs << 16 | rt2 << 10 | rn << 5 | rt);
27
+
94
+}
28
static inline Int128 int128_and(Int128 a, Int128 b)
95
+
96
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
97
TCGReg rt, int imm19)
29
{
98
{
30
return a & b;
99
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
31
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
100
tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
32
return a | b;
33
}
101
}
34
102
35
+static inline Int128 int128_xor(Int128 a, Int128 b)
103
+static inline void tcg_out_insn_3501(TCGContext *s, AArch64Insn insn,
36
+{
104
+ TCGType sf, TCGReg rd, TCGReg rn,
37
+ return a ^ b;
105
+ TCGReg rm, int opt, int imm3)
38
+}
106
+{
39
+
107
+ tcg_out32(s, insn | sf << 31 | rm << 16 | opt << 13 |
40
static inline Int128 int128_rshift(Int128 a, int n)
108
+ imm3 << 10 | rn << 5 | rd);
41
{
109
+}
42
return a >> n;
110
+
43
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
111
/* This function is for both 3.5.2 (Add/Subtract shifted register), for
44
return int128_make128(a, (a < 0) ? -1 : 0);
112
the rare occasion when we actually want to supply a shift amount. */
113
static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
114
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
115
TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
116
TCGLabelQemuLdst *ldst = NULL;
117
MemOp opc = get_memop(oi);
118
+ MemOp s_bits = opc & MO_SIZE;
119
unsigned a_mask;
120
121
h->aa = atom_and_align_for_opc(s, opc,
122
have_lse2 ? MO_ATOM_WITHIN16
123
: MO_ATOM_IFALIGN,
124
- false);
125
+ s_bits == MO_128);
126
a_mask = (1 << h->aa.align) - 1;
127
128
#ifdef CONFIG_SOFTMMU
129
- unsigned s_bits = opc & MO_SIZE;
130
unsigned s_mask = (1u << s_bits) - 1;
131
unsigned mem_index = get_mmuidx(oi);
132
TCGReg x3;
133
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
134
}
45
}
135
}
46
136
47
+static inline Int128 int128_not(Int128 a)
137
+static TCGLabelQemuLdst *
48
+{
138
+prepare_host_addr_base_only(TCGContext *s, HostAddress *h, TCGReg addr_reg,
49
+ return int128_make128(~a.lo, ~a.hi);
139
+ MemOpIdx oi, bool is_ld)
50
+}
140
+{
51
+
141
+ TCGLabelQemuLdst *ldst;
52
static inline Int128 int128_and(Int128 a, Int128 b)
142
+
53
{
143
+ ldst = prepare_host_addr(s, h, addr_reg, oi, true);
54
return int128_make128(a.lo & b.lo, a.hi & b.hi);
144
+
55
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
145
+ /* Compose the final address, as LDP/STP have no indexing. */
56
return int128_make128(a.lo | b.lo, a.hi | b.hi);
146
+ if (h->index != TCG_REG_XZR) {
147
+ tcg_out_insn(s, 3501, ADD, TCG_TYPE_I64, TCG_REG_TMP0,
148
+ h->base, h->index,
149
+ h->index_ext == TCG_TYPE_I32 ? MO_32 : MO_64, 0);
150
+ h->base = TCG_REG_TMP0;
151
+ h->index = TCG_REG_XZR;
152
+ h->index_ext = TCG_TYPE_I64;
153
+ }
154
+
155
+ return ldst;
156
+}
157
+
158
+static void tcg_out_qemu_ld128(TCGContext *s, TCGReg datalo, TCGReg datahi,
159
+ TCGReg addr_reg, MemOpIdx oi)
160
+{
161
+ TCGLabelQemuLdst *ldst;
162
+ HostAddress h;
163
+
164
+ ldst = prepare_host_addr_base_only(s, &h, addr_reg, oi, true);
165
+
166
+ if (h.aa.atom < MO_128 || have_lse2) {
167
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, h.base, 0, 0, 0);
168
+ } else {
169
+ TCGLabel *l0, *l1 = NULL;
170
+
171
+ /*
172
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
173
+ * 1: ldxp lo,hi,[addr]
174
+ * stxp tmp1,lo,hi,[addr]
175
+ * cbnz tmp1, 1b
176
+ *
177
+ * If we have already checked for 16-byte alignment, that's all
178
+ * we need. Otherwise we have determined that misaligned atomicity
179
+ * may be handled with two 8-byte loads.
180
+ */
181
+ if (h.aa.align < MO_128) {
182
+ /*
183
+ * TODO: align should be MO_64, so we only need test bit 3,
184
+ * which means we could use TBNZ instead of AND+CBNE.
185
+ */
186
+ l1 = gen_new_label();
187
+ tcg_out_logicali(s, I3404_ANDI, 0, TCG_REG_TMP1, addr_reg, 15);
188
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE,
189
+ TCG_REG_TMP1, 0, 1, l1);
190
+ }
191
+
192
+ l0 = gen_new_label();
193
+ tcg_out_label(s, l0);
194
+
195
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR, datalo, datahi, h.base);
196
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP1, datalo, datahi, h.base);
197
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE, TCG_REG_TMP1, 0, 1, l0);
198
+
199
+ if (l1) {
200
+ TCGLabel *l2 = gen_new_label();
201
+ tcg_out_goto_label(s, l2);
202
+
203
+ tcg_out_label(s, l1);
204
+ tcg_out_insn(s, 3314, LDP, datalo, datahi, h.base, 0, 0, 0);
205
+
206
+ tcg_out_label(s, l2);
207
+ }
208
+ }
209
+
210
+ if (ldst) {
211
+ ldst->type = TCG_TYPE_I128;
212
+ ldst->datalo_reg = datalo;
213
+ ldst->datahi_reg = datahi;
214
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
215
+ }
216
+}
217
+
218
+static void tcg_out_qemu_st128(TCGContext *s, TCGReg datalo, TCGReg datahi,
219
+ TCGReg addr_reg, MemOpIdx oi)
220
+{
221
+ TCGLabelQemuLdst *ldst;
222
+ HostAddress h;
223
+
224
+ ldst = prepare_host_addr_base_only(s, &h, addr_reg, oi, false);
225
+
226
+ if (h.aa.atom < MO_128 || have_lse2) {
227
+ tcg_out_insn(s, 3314, STP, datalo, datahi, h.base, 0, 0, 0);
228
+ } else {
229
+ TCGLabel *l0, *l1 = NULL;
230
+
231
+ /*
232
+ * 16-byte atomicity without LSE2 requires LDXP+STXP loop:
233
+ * 1: ldxp xzr,tmp1,[addr]
234
+ * stxp tmp1,lo,hi,[addr]
235
+ * cbnz tmp1, 1b
236
+ *
237
+ * If we have already checked for 16-byte alignment, that's all
238
+ * we need. Otherwise we have determined that misaligned atomicity
239
+ * may be handled with two 8-byte stores.
240
+ */
241
+ if (h.aa.align < MO_128) {
242
+ /*
243
+ * TODO: align should be MO_64, so we only need test bit 3,
244
+ * which means we could use TBNZ instead of AND+CBNE.
245
+ */
246
+ l1 = gen_new_label();
247
+ tcg_out_logicali(s, I3404_ANDI, 0, TCG_REG_TMP1, addr_reg, 15);
248
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE,
249
+ TCG_REG_TMP1, 0, 1, l1);
250
+ }
251
+
252
+ l0 = gen_new_label();
253
+ tcg_out_label(s, l0);
254
+
255
+ tcg_out_insn(s, 3306, LDXP, TCG_REG_XZR,
256
+ TCG_REG_XZR, TCG_REG_TMP1, h.base);
257
+ tcg_out_insn(s, 3306, STXP, TCG_REG_TMP1, datalo, datahi, h.base);
258
+ tcg_out_brcond(s, TCG_TYPE_I32, TCG_COND_NE, TCG_REG_TMP1, 0, 1, l0);
259
+
260
+ if (l1) {
261
+ TCGLabel *l2 = gen_new_label();
262
+ tcg_out_goto_label(s, l2);
263
+
264
+ tcg_out_label(s, l1);
265
+ tcg_out_insn(s, 3314, STP, datalo, datahi, h.base, 0, 0, 0);
266
+
267
+ tcg_out_label(s, l2);
268
+ }
269
+ }
270
+
271
+ if (ldst) {
272
+ ldst->type = TCG_TYPE_I128;
273
+ ldst->datalo_reg = datalo;
274
+ ldst->datahi_reg = datahi;
275
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
276
+ }
277
+}
278
+
279
static const tcg_insn_unit *tb_ret_addr;
280
281
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
282
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
283
case INDEX_op_qemu_st_i64:
284
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
285
break;
286
+ case INDEX_op_qemu_ld_i128:
287
+ tcg_out_qemu_ld128(s, a0, a1, a2, args[3]);
288
+ break;
289
+ case INDEX_op_qemu_st_i128:
290
+ tcg_out_qemu_st128(s, REG0(0), REG0(1), a2, args[3]);
291
+ break;
292
293
case INDEX_op_bswap64_i64:
294
tcg_out_rev(s, TCG_TYPE_I64, MO_64, a0, a1);
295
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
296
case INDEX_op_qemu_ld_i32:
297
case INDEX_op_qemu_ld_i64:
298
return C_O1_I1(r, l);
299
+ case INDEX_op_qemu_ld_i128:
300
+ return C_O2_I1(r, r, l);
301
case INDEX_op_qemu_st_i32:
302
case INDEX_op_qemu_st_i64:
303
return C_O0_I2(lZ, l);
304
+ case INDEX_op_qemu_st_i128:
305
+ return C_O0_I3(lZ, lZ, l);
306
307
case INDEX_op_deposit_i32:
308
case INDEX_op_deposit_i64:
309
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
310
tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
311
tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
312
tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
313
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
314
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP0);
57
}
315
}
58
316
59
+static inline Int128 int128_xor(Int128 a, Int128 b)
60
+{
61
+ return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
62
+}
63
+
64
static inline Int128 int128_rshift(Int128 a, int n)
65
{
66
int64_t h;
67
--
317
--
68
2.25.1
318
2.34.1
69
70
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Use LQ/STQ with ISA v2.07, and 16-byte atomicity is required.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Note that these instructions do not require 16-byte alignment.
3
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
7
tcg/ppc/tcg-target-con-set.h | 2 +
6
1 file changed, 31 insertions(+), 22 deletions(-)
8
tcg/ppc/tcg-target-con-str.h | 1 +
9
tcg/ppc/tcg-target.h | 3 +-
10
tcg/ppc/tcg-target.c.inc | 115 +++++++++++++++++++++++++++++++----
11
4 files changed, 108 insertions(+), 13 deletions(-)
7
12
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
9
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
15
--- a/tcg/ppc/tcg-target-con-set.h
11
+++ b/tcg/optimize.c
16
+++ b/tcg/ppc/tcg-target-con-set.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
17
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
13
return fold_const2(ctx, op);
18
C_O0_I2(r, ri)
19
C_O0_I2(v, r)
20
C_O0_I3(r, r, r)
21
+C_O0_I3(o, m, r)
22
C_O0_I4(r, r, ri, ri)
23
C_O0_I4(r, r, r, r)
24
C_O1_I1(r, r)
25
@@ -XXX,XX +XXX,XX @@ C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rZ, rZ)
27
C_O1_I4(r, r, r, ri, ri)
28
C_O2_I1(r, r, r)
29
+C_O2_I1(o, m, r)
30
C_O2_I2(r, r, r, r)
31
C_O2_I4(r, r, rI, rZM, r, r)
32
C_O2_I4(r, r, r, r, rI, rZM)
33
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/tcg/ppc/tcg-target-con-str.h
36
+++ b/tcg/ppc/tcg-target-con-str.h
37
@@ -XXX,XX +XXX,XX @@
38
* REGS(letter, register_mask)
39
*/
40
REGS('r', ALL_GENERAL_REGS)
41
+REGS('o', ALL_GENERAL_REGS & 0xAAAAAAAAu) /* odd registers */
42
REGS('v', ALL_VECTOR_REGS)
43
44
/*
45
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/ppc/tcg-target.h
48
+++ b/tcg/ppc/tcg-target.h
49
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
50
#define TCG_TARGET_HAS_mulsh_i64 1
51
#endif
52
53
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
54
+#define TCG_TARGET_HAS_qemu_ldst_i128 \
55
+ (TCG_TARGET_REG_BITS == 64 && have_isa_2_07)
56
57
/*
58
* While technically Altivec could support V64, it has no 64-bit store
59
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/ppc/tcg-target.c.inc
62
+++ b/tcg/ppc/tcg-target.c.inc
63
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
64
65
#define B OPCD( 18)
66
#define BC OPCD( 16)
67
+
68
#define LBZ OPCD( 34)
69
#define LHZ OPCD( 40)
70
#define LHA OPCD( 42)
71
#define LWZ OPCD( 32)
72
#define LWZUX XO31( 55)
73
-#define STB OPCD( 38)
74
-#define STH OPCD( 44)
75
-#define STW OPCD( 36)
76
-
77
-#define STD XO62( 0)
78
-#define STDU XO62( 1)
79
-#define STDX XO31(149)
80
-
81
#define LD XO58( 0)
82
#define LDX XO31( 21)
83
#define LDU XO58( 1)
84
#define LDUX XO31( 53)
85
#define LWA XO58( 2)
86
#define LWAX XO31(341)
87
+#define LQ OPCD( 56)
88
+
89
+#define STB OPCD( 38)
90
+#define STH OPCD( 44)
91
+#define STW OPCD( 36)
92
+#define STD XO62( 0)
93
+#define STDU XO62( 1)
94
+#define STDX XO31(149)
95
+#define STQ XO62( 2)
96
97
#define ADDIC OPCD( 12)
98
#define ADDI OPCD( 14)
99
@@ -XXX,XX +XXX,XX @@ typedef struct {
100
101
bool tcg_target_has_memory_bswap(MemOp memop)
102
{
103
- return true;
104
+ TCGAtomAlign aa;
105
+
106
+ if ((memop & MO_SIZE) <= MO_64) {
107
+ return true;
108
+ }
109
+
110
+ /*
111
+ * Reject 16-byte memop with 16-byte atomicity,
112
+ * but do allow a pair of 64-bit operations.
113
+ */
114
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
115
+ return aa.atom <= MO_64;
14
}
116
}
15
117
16
+static bool fold_dup(OptContext *ctx, TCGOp *op)
118
/*
119
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
120
{
121
TCGLabelQemuLdst *ldst = NULL;
122
MemOp opc = get_memop(oi);
123
- MemOp a_bits;
124
+ MemOp a_bits, s_bits;
125
126
/*
127
* Book II, Section 1.4, Single-Copy Atomicity, specifies:
128
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
129
* As of 3.0, "the non-atomic access is performed as described in
130
* the corresponding list", which matches MO_ATOM_SUBALIGN.
131
*/
132
+ s_bits = opc & MO_SIZE;
133
h->aa = atom_and_align_for_opc(s, opc,
134
have_isa_3_00 ? MO_ATOM_SUBALIGN
135
: MO_ATOM_IFALIGN,
136
- false);
137
+ s_bits == MO_128);
138
a_bits = h->aa.align;
139
140
#ifdef CONFIG_SOFTMMU
141
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
142
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
143
int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
144
int table_off = fast_off + offsetof(CPUTLBDescFast, table);
145
- unsigned s_bits = opc & MO_SIZE;
146
147
ldst = new_ldst_label(s);
148
ldst->is_ld = is_ld;
149
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
150
}
151
}
152
153
+static TCGLabelQemuLdst *
154
+prepare_host_addr_index_only(TCGContext *s, HostAddress *h, TCGReg addr_reg,
155
+ MemOpIdx oi, bool is_ld)
17
+{
156
+{
18
+ if (arg_is_const(op->args[1])) {
157
+ TCGLabelQemuLdst *ldst;
19
+ uint64_t t = arg_info(op->args[1])->val;
158
+
20
+ t = dup_const(TCGOP_VECE(op), t);
159
+ ldst = prepare_host_addr(s, h, addr_reg, -1, oi, true);
21
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
160
+
22
+ }
161
+ /* Compose the final address, as LQ/STQ have no indexing. */
23
+ return false;
162
+ if (h->base != 0) {
163
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, h->base, h->index));
164
+ h->index = TCG_REG_TMP1;
165
+ h->base = 0;
166
+ }
167
+
168
+ return ldst;
24
+}
169
+}
25
+
170
+
26
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
171
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
172
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
27
+{
173
+{
28
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
174
+ TCGLabelQemuLdst *ldst;
29
+ uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
175
+ HostAddress h;
30
+ arg_info(op->args[2])->val);
176
+ bool need_bswap;
31
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
177
+ uint32_t insn;
32
+ }
178
+
33
+
179
+ ldst = prepare_host_addr_index_only(s, &h, addr_reg, oi, is_ld);
34
+ if (args_are_copies(op->args[1], op->args[2])) {
180
+ need_bswap = get_memop(oi) & MO_BSWAP;
35
+ op->opc = INDEX_op_dup_vec;
181
+
36
+ TCGOP_VECE(op) = MO_32;
182
+ if (h.aa.atom == MO_128) {
37
+ }
183
+ tcg_debug_assert(!need_bswap);
38
+ return false;
184
+ tcg_debug_assert(datalo & 1);
185
+ tcg_debug_assert(datahi == datalo - 1);
186
+ insn = is_ld ? LQ : STQ;
187
+ tcg_out32(s, insn | TAI(datahi, h.index, 0));
188
+ } else {
189
+ TCGReg d1, d2;
190
+
191
+ if (HOST_BIG_ENDIAN ^ need_bswap) {
192
+ d1 = datahi, d2 = datalo;
193
+ } else {
194
+ d1 = datalo, d2 = datahi;
195
+ }
196
+
197
+ if (need_bswap) {
198
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 8);
199
+ insn = is_ld ? LDBRX : STDBRX;
200
+ tcg_out32(s, insn | TAB(d1, 0, h.index));
201
+ tcg_out32(s, insn | TAB(d2, h.index, TCG_REG_R0));
202
+ } else {
203
+ insn = is_ld ? LD : STD;
204
+ tcg_out32(s, insn | TAI(d1, h.index, 0));
205
+ tcg_out32(s, insn | TAI(d2, h.index, 8));
206
+ }
207
+ }
208
+
209
+ if (ldst) {
210
+ ldst->type = TCG_TYPE_I128;
211
+ ldst->datalo_reg = datalo;
212
+ ldst->datahi_reg = datahi;
213
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
214
+ }
39
+}
215
+}
40
+
216
+
41
static bool fold_eqv(OptContext *ctx, TCGOp *op)
217
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
42
{
218
{
43
return fold_const2(ctx, op);
219
int i;
44
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
220
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
45
done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
221
args[4], TCG_TYPE_I64);
46
break;
222
}
47
223
break;
48
- case INDEX_op_dup_vec:
224
+ case INDEX_op_qemu_ld_i128:
49
- if (arg_is_const(op->args[1])) {
225
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
50
- tmp = arg_info(op->args[1])->val;
226
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
51
- tmp = dup_const(TCGOP_VECE(op), tmp);
227
+ break;
52
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
228
+
53
- continue;
229
case INDEX_op_qemu_st_i32:
54
- }
230
if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
55
- break;
231
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
56
-
232
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
57
- case INDEX_op_dup2_vec:
233
args[4], TCG_TYPE_I64);
58
- assert(TCG_TARGET_REG_BITS == 32);
234
}
59
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
235
break;
60
- tcg_opt_gen_movi(&ctx, op, op->args[0],
236
+ case INDEX_op_qemu_st_i128:
61
- deposit64(arg_info(op->args[1])->val, 32, 32,
237
+ tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
62
- arg_info(op->args[2])->val));
238
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
63
- continue;
239
+ break;
64
- } else if (args_are_copies(op->args[1], op->args[2])) {
240
65
- op->opc = INDEX_op_dup_vec;
241
case INDEX_op_setcond_i32:
66
- TCGOP_VECE(op) = MO_32;
242
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
67
- }
243
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
68
- break;
244
: TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
69
-
245
: C_O0_I4(r, r, r, r));
70
default:
246
71
break;
247
+ case INDEX_op_qemu_ld_i128:
72
248
+ return C_O2_I1(o, m, r);
73
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
249
+ case INDEX_op_qemu_st_i128:
74
CASE_OP_32_64(divu):
250
+ return C_O0_I3(o, m, r);
75
done = fold_divide(&ctx, op);
251
+
76
break;
252
case INDEX_op_add_vec:
77
+ case INDEX_op_dup_vec:
253
case INDEX_op_sub_vec:
78
+ done = fold_dup(&ctx, op);
254
case INDEX_op_mul_vec:
79
+ break;
80
+ case INDEX_op_dup2_vec:
81
+ done = fold_dup2(&ctx, op);
82
+ break;
83
CASE_OP_32_64(eqv):
84
done = fold_eqv(&ctx, op);
85
break;
86
--
255
--
87
2.25.1
256
2.34.1
88
89
diff view generated by jsdifflib
1
Even though there is only one user, place this more complex
1
Use LPQ/STPQ when 16-byte atomicity is required.
2
conversion into its own helper.
2
Note that these instructions require 16-byte alignment.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
7
tcg/s390x/tcg-target-con-set.h | 2 +
8
1 file changed, 47 insertions(+), 42 deletions(-)
8
tcg/s390x/tcg-target.h | 2 +-
9
tcg/s390x/tcg-target.c.inc | 103 ++++++++++++++++++++++++++++++++-
10
3 files changed, 103 insertions(+), 4 deletions(-)
9
11
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
14
--- a/tcg/s390x/tcg-target-con-set.h
13
+++ b/tcg/optimize.c
15
+++ b/tcg/s390x/tcg-target-con-set.h
14
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ C_O0_I2(r, r)
15
17
C_O0_I2(r, ri)
16
static bool fold_neg(OptContext *ctx, TCGOp *op)
18
C_O0_I2(r, rA)
19
C_O0_I2(v, r)
20
+C_O0_I3(o, m, r)
21
C_O1_I1(r, r)
22
C_O1_I1(v, r)
23
C_O1_I1(v, v)
24
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
25
C_O1_I3(v, v, v, v)
26
C_O1_I4(r, r, ri, rI, r)
27
C_O1_I4(r, r, rA, rI, r)
28
+C_O2_I1(o, m, r)
29
C_O2_I2(o, m, 0, r)
30
C_O2_I2(o, m, r, r)
31
C_O2_I3(o, m, 0, 1, r)
32
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
33
index XXXXXXX..XXXXXXX 100644
34
--- a/tcg/s390x/tcg-target.h
35
+++ b/tcg/s390x/tcg-target.h
36
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
37
#define TCG_TARGET_HAS_muluh_i64 0
38
#define TCG_TARGET_HAS_mulsh_i64 0
39
40
-#define TCG_TARGET_HAS_qemu_ldst_i128 0
41
+#define TCG_TARGET_HAS_qemu_ldst_i128 1
42
43
#define TCG_TARGET_HAS_v64 HAVE_FACILITY(VECTOR)
44
#define TCG_TARGET_HAS_v128 HAVE_FACILITY(VECTOR)
45
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
46
index XXXXXXX..XXXXXXX 100644
47
--- a/tcg/s390x/tcg-target.c.inc
48
+++ b/tcg/s390x/tcg-target.c.inc
49
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
50
RXY_LLGF = 0xe316,
51
RXY_LLGH = 0xe391,
52
RXY_LMG = 0xeb04,
53
+ RXY_LPQ = 0xe38f,
54
RXY_LRV = 0xe31e,
55
RXY_LRVG = 0xe30f,
56
RXY_LRVH = 0xe31f,
57
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
58
RXY_STG = 0xe324,
59
RXY_STHY = 0xe370,
60
RXY_STMG = 0xeb24,
61
+ RXY_STPQ = 0xe38e,
62
RXY_STRV = 0xe33e,
63
RXY_STRVG = 0xe32f,
64
RXY_STRVH = 0xe33f,
65
@@ -XXX,XX +XXX,XX @@ typedef struct {
66
67
bool tcg_target_has_memory_bswap(MemOp memop)
17
{
68
{
18
- return fold_const1(ctx, op);
69
- return true;
19
+ if (fold_const1(ctx, op)) {
70
+ TCGAtomAlign aa;
71
+
72
+ if ((memop & MO_SIZE) <= MO_64) {
20
+ return true;
73
+ return true;
21
+ }
74
+ }
75
+
22
+ /*
76
+ /*
23
+ * Because of fold_sub_to_neg, we want to always return true,
77
+ * Reject 16-byte memop with 16-byte atomicity,
24
+ * via finish_folding.
78
+ * but do allow a pair of 64-bit operations.
25
+ */
79
+ */
26
+ finish_folding(ctx, op);
80
+ aa = atom_and_align_for_opc(tcg_ctx, memop, MO_ATOM_IFALIGN, true);
27
+ return true;
81
+ return aa.atom <= MO_64;
28
}
82
}
29
83
30
static bool fold_nor(OptContext *ctx, TCGOp *op)
84
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
31
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
85
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
32
return fold_const2(ctx, op);
86
{
87
TCGLabelQemuLdst *ldst = NULL;
88
MemOp opc = get_memop(oi);
89
+ MemOp s_bits = opc & MO_SIZE;
90
unsigned a_mask;
91
92
- h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false);
93
+ h->aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, s_bits == MO_128);
94
a_mask = (1 << h->aa.align) - 1;
95
96
#ifdef CONFIG_SOFTMMU
97
- unsigned s_bits = opc & MO_SIZE;
98
unsigned s_mask = (1 << s_bits) - 1;
99
int mem_index = get_mmuidx(oi);
100
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
101
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
102
}
33
}
103
}
34
104
35
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
105
+static void tcg_out_qemu_ldst_i128(TCGContext *s, TCGReg datalo, TCGReg datahi,
106
+ TCGReg addr_reg, MemOpIdx oi, bool is_ld)
36
+{
107
+{
37
+ TCGOpcode neg_op;
108
+ TCGLabel *l1 = NULL, *l2 = NULL;
38
+ bool have_neg;
109
+ TCGLabelQemuLdst *ldst;
39
+
110
+ HostAddress h;
40
+ if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
111
+ bool need_bswap;
41
+ return false;
112
+ bool use_pair;
42
+ }
113
+ S390Opcode insn;
43
+
114
+
44
+ switch (ctx->type) {
115
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, is_ld);
45
+ case TCG_TYPE_I32:
116
+
46
+ neg_op = INDEX_op_neg_i32;
117
+ use_pair = h.aa.atom < MO_128;
47
+ have_neg = TCG_TARGET_HAS_neg_i32;
118
+ need_bswap = get_memop(oi) & MO_BSWAP;
119
+
120
+ if (!use_pair) {
121
+ /*
122
+ * Atomicity requires we use LPQ. If we've already checked for
123
+ * 16-byte alignment, that's all we need. If we arrive with
124
+ * lesser alignment, we have determined that less than 16-byte
125
+ * alignment can be satisfied with two 8-byte loads.
126
+ */
127
+ if (h.aa.align < MO_128) {
128
+ use_pair = true;
129
+ l1 = gen_new_label();
130
+ l2 = gen_new_label();
131
+
132
+ tcg_out_insn(s, RI, TMLL, addr_reg, 15);
133
+ tgen_branch(s, 7, l1); /* CC in {1,2,3} */
134
+ }
135
+
136
+ tcg_debug_assert(!need_bswap);
137
+ tcg_debug_assert(datalo & 1);
138
+ tcg_debug_assert(datahi == datalo - 1);
139
+ insn = is_ld ? RXY_LPQ : RXY_STPQ;
140
+ tcg_out_insn_RXY(s, insn, datahi, h.base, h.index, h.disp);
141
+
142
+ if (use_pair) {
143
+ tgen_branch(s, S390_CC_ALWAYS, l2);
144
+ tcg_out_label(s, l1);
145
+ }
146
+ }
147
+ if (use_pair) {
148
+ TCGReg d1, d2;
149
+
150
+ if (need_bswap) {
151
+ d1 = datalo, d2 = datahi;
152
+ insn = is_ld ? RXY_LRVG : RXY_STRVG;
153
+ } else {
154
+ d1 = datahi, d2 = datalo;
155
+ insn = is_ld ? RXY_LG : RXY_STG;
156
+ }
157
+
158
+ if (h.base == d1 || h.index == d1) {
159
+ tcg_out_insn(s, RXY, LAY, TCG_TMP0, h.base, h.index, h.disp);
160
+ h.base = TCG_TMP0;
161
+ h.index = TCG_REG_NONE;
162
+ h.disp = 0;
163
+ }
164
+ tcg_out_insn_RXY(s, insn, d1, h.base, h.index, h.disp);
165
+ tcg_out_insn_RXY(s, insn, d2, h.base, h.index, h.disp + 8);
166
+ }
167
+ if (l2) {
168
+ tcg_out_label(s, l2);
169
+ }
170
+
171
+ if (ldst) {
172
+ ldst->type = TCG_TYPE_I128;
173
+ ldst->datalo_reg = datalo;
174
+ ldst->datahi_reg = datahi;
175
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
176
+ }
177
+}
178
+
179
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
180
{
181
/* Reuse the zeroing that exists for goto_ptr. */
182
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
183
case INDEX_op_qemu_st_i64:
184
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
185
break;
186
+ case INDEX_op_qemu_ld_i128:
187
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
48
+ break;
188
+ break;
49
+ case TCG_TYPE_I64:
189
+ case INDEX_op_qemu_st_i128:
50
+ neg_op = INDEX_op_neg_i64;
190
+ tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
51
+ have_neg = TCG_TARGET_HAS_neg_i64;
52
+ break;
191
+ break;
53
+ case TCG_TYPE_V64:
192
54
+ case TCG_TYPE_V128:
193
case INDEX_op_ld16s_i64:
55
+ case TCG_TYPE_V256:
194
tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
56
+ neg_op = INDEX_op_neg_vec;
195
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
57
+ have_neg = (TCG_TARGET_HAS_neg_vec &&
196
case INDEX_op_qemu_st_i64:
58
+ tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
197
case INDEX_op_qemu_st_i32:
59
+ break;
198
return C_O0_I2(r, r);
60
+ default:
199
+ case INDEX_op_qemu_ld_i128:
61
+ g_assert_not_reached();
200
+ return C_O2_I1(o, m, r);
62
+ }
201
+ case INDEX_op_qemu_st_i128:
63
+ if (have_neg) {
202
+ return C_O0_I3(o, m, r);
64
+ op->opc = neg_op;
203
65
+ op->args[1] = op->args[2];
204
case INDEX_op_deposit_i32:
66
+ return fold_neg(ctx, op);
205
case INDEX_op_deposit_i64:
67
+ }
68
+ return false;
69
+}
70
+
71
static bool fold_sub(OptContext *ctx, TCGOp *op)
72
{
73
if (fold_const2(ctx, op) ||
74
- fold_xx_to_i(ctx, op, 0)) {
75
+ fold_xx_to_i(ctx, op, 0) ||
76
+ fold_sub_to_neg(ctx, op)) {
77
return true;
78
}
79
return false;
80
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
81
continue;
82
}
83
break;
84
- CASE_OP_32_64_VEC(sub):
85
- {
86
- TCGOpcode neg_op;
87
- bool have_neg;
88
-
89
- if (arg_is_const(op->args[2])) {
90
- /* Proceed with possible constant folding. */
91
- break;
92
- }
93
- switch (ctx.type) {
94
- case TCG_TYPE_I32:
95
- neg_op = INDEX_op_neg_i32;
96
- have_neg = TCG_TARGET_HAS_neg_i32;
97
- break;
98
- case TCG_TYPE_I64:
99
- neg_op = INDEX_op_neg_i64;
100
- have_neg = TCG_TARGET_HAS_neg_i64;
101
- break;
102
- case TCG_TYPE_V64:
103
- case TCG_TYPE_V128:
104
- case TCG_TYPE_V256:
105
- neg_op = INDEX_op_neg_vec;
106
- have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
107
- TCGOP_VECE(op)) > 0;
108
- break;
109
- default:
110
- g_assert_not_reached();
111
- }
112
- if (!have_neg) {
113
- break;
114
- }
115
- if (arg_is_const(op->args[1])
116
- && arg_info(op->args[1])->val == 0) {
117
- op->opc = neg_op;
118
- reset_temp(op->args[0]);
119
- op->args[1] = op->args[2];
120
- continue;
121
- }
122
- }
123
- break;
124
default:
125
break;
126
}
127
--
206
--
128
2.25.1
207
2.34.1
129
130
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
3
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
3
---
8
tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
4
tcg/tcg-op-ldst.c | 1006 +++++++++++++++++++++++++++++++++++++++++++++
9
tests/unit/meson.build | 1 +
5
tcg/tcg-op.c | 974 -------------------------------------------
10
2 files changed, 198 insertions(+)
6
tcg/meson.build | 1 +
11
create mode 100644 tests/unit/test-div128.c
7
3 files changed, 1007 insertions(+), 974 deletions(-)
8
create mode 100644 tcg/tcg-op-ldst.c
12
9
13
diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
10
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
14
new file mode 100644
11
new file mode 100644
15
index XXXXXXX..XXXXXXX
12
index XXXXXXX..XXXXXXX
16
--- /dev/null
13
--- /dev/null
17
+++ b/tests/unit/test-div128.c
14
+++ b/tcg/tcg-op-ldst.c
18
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@
19
+/*
16
+/*
20
+ * Test 128-bit division functions
17
+ * Tiny Code Generator for QEMU
21
+ *
18
+ *
22
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
19
+ * Copyright (c) 2008 Fabrice Bellard
23
+ *
20
+ *
24
+ * This library is free software; you can redistribute it and/or
21
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
25
+ * modify it under the terms of the GNU Lesser General Public
22
+ * of this software and associated documentation files (the "Software"), to deal
26
+ * License as published by the Free Software Foundation; either
23
+ * in the Software without restriction, including without limitation the rights
27
+ * version 2.1 of the License, or (at your option) any later version.
24
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
25
+ * copies of the Software, and to permit persons to whom the Software is
26
+ * furnished to do so, subject to the following conditions:
28
+ *
27
+ *
29
+ * This library is distributed in the hope that it will be useful,
28
+ * The above copyright notice and this permission notice shall be included in
30
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29
+ * all copies or substantial portions of the Software.
31
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32
+ * Lesser General Public License for more details.
33
+ *
30
+ *
34
+ * You should have received a copy of the GNU Lesser General Public
31
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
35
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
32
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
34
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
35
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
36
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
37
+ * THE SOFTWARE.
36
+ */
38
+ */
37
+
39
+
38
+#include "qemu/osdep.h"
40
+#include "qemu/osdep.h"
39
+#include "qemu/host-utils.h"
41
+#include "exec/exec-all.h"
40
+
42
+#include "tcg/tcg.h"
41
+typedef struct {
43
+#include "tcg/tcg-temp-internal.h"
42
+ uint64_t high;
44
+#include "tcg/tcg-op.h"
43
+ uint64_t low;
45
+#include "tcg/tcg-mo.h"
44
+ uint64_t rhigh;
46
+#include "exec/plugin-gen.h"
45
+ uint64_t rlow;
47
+#include "tcg-internal.h"
46
+ uint64_t divisor;
48
+
47
+ uint64_t remainder;
49
+
48
+} test_data_unsigned;
50
+static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
49
+
51
+{
50
+typedef struct {
52
+ /* Trigger the asserts within as early as possible. */
51
+ int64_t high;
53
+ unsigned a_bits = get_alignment_bits(op);
52
+ uint64_t low;
54
+
53
+ int64_t rhigh;
55
+ /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
54
+ uint64_t rlow;
56
+ if (a_bits == (op & MO_SIZE)) {
55
+ int64_t divisor;
57
+ op = (op & ~MO_AMASK) | MO_ALIGN;
56
+ int64_t remainder;
58
+ }
57
+} test_data_signed;
59
+
58
+
60
+ switch (op & MO_SIZE) {
59
+static const test_data_unsigned test_table_unsigned[] = {
61
+ case MO_8:
60
+ /* Dividend fits in 64 bits */
62
+ op &= ~MO_BSWAP;
61
+ { 0x0000000000000000ULL, 0x0000000000000000ULL,
63
+ break;
62
+ 0x0000000000000000ULL, 0x0000000000000000ULL,
64
+ case MO_16:
63
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
65
+ break;
64
+ { 0x0000000000000000ULL, 0x0000000000000001ULL,
66
+ case MO_32:
65
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
67
+ if (!is64) {
66
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
68
+ op &= ~MO_SIGN;
67
+ { 0x0000000000000000ULL, 0x0000000000000003ULL,
69
+ }
68
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
70
+ break;
69
+ 0x0000000000000002ULL, 0x0000000000000001ULL},
71
+ case MO_64:
70
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
72
+ if (is64) {
71
+ 0x0000000000000000ULL, 0x8000000000000000ULL,
73
+ op &= ~MO_SIGN;
72
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
74
+ break;
73
+ { 0x0000000000000000ULL, 0xa000000000000000ULL,
75
+ }
74
+ 0x0000000000000000ULL, 0x0000000000000002ULL,
76
+ /* fall through */
75
+ 0x4000000000000000ULL, 0x2000000000000000ULL},
77
+ default:
76
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
78
+ g_assert_not_reached();
77
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
79
+ }
78
+ 0x8000000000000000ULL, 0x0000000000000000ULL},
80
+ if (st) {
79
+
81
+ op &= ~MO_SIGN;
80
+ /* Dividend > 64 bits, with MSB 0 */
82
+ }
81
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
83
+ return op;
82
+ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
84
+}
83
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
85
+
84
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
86
+static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
85
+ 0x0000000000000001ULL, 0x000000000000000dULL,
87
+ MemOp memop, TCGArg idx)
86
+ 0x123456789abcdefeULL, 0x03456789abcdf03bULL},
88
+{
87
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
89
+ MemOpIdx oi = make_memop_idx(memop, idx);
88
+ 0x0123456789abcdefULL, 0xeefedcba98765432ULL,
90
+#if TARGET_LONG_BITS == 32
89
+ 0x0000000000000010ULL, 0x0000000000000001ULL},
91
+ tcg_gen_op3i_i32(opc, val, addr, oi);
90
+
92
+#else
91
+ /* Dividend > 64 bits, with MSB 1 */
93
+ if (TCG_TARGET_REG_BITS == 32) {
92
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
94
+ tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
93
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
95
+ } else {
94
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
96
+ tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
95
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
97
+ }
96
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
98
+#endif
97
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
99
+}
98
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
100
+
99
+ 0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
101
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
100
+ 0x0000000000000010ULL, 0x000000000000000fULL},
102
+ MemOp memop, TCGArg idx)
101
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
103
+{
102
+ 0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
104
+ MemOpIdx oi = make_memop_idx(memop, idx);
103
+ 0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
105
+#if TARGET_LONG_BITS == 32
104
+
106
+ if (TCG_TARGET_REG_BITS == 32) {
105
+ /**
107
+ tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
106
+ * Divisor == 64 bits, with MSB 1
108
+ } else {
107
+ * and high 64 bits of dividend >= divisor
109
+ tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
108
+ * (for testing normalization)
110
+ }
111
+#else
112
+ if (TCG_TARGET_REG_BITS == 32) {
113
+ tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
114
+ TCGV_LOW(addr), TCGV_HIGH(addr), oi);
115
+ } else {
116
+ tcg_gen_op3i_i64(opc, val, addr, oi);
117
+ }
118
+#endif
119
+}
120
+
121
+static void tcg_gen_req_mo(TCGBar type)
122
+{
123
+#ifdef TCG_GUEST_DEFAULT_MO
124
+ type &= TCG_GUEST_DEFAULT_MO;
125
+#endif
126
+ type &= ~TCG_TARGET_DEFAULT_MO;
127
+ if (type) {
128
+ tcg_gen_mb(type | TCG_BAR_SC);
129
+ }
130
+}
131
+
132
+static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
133
+{
134
+#ifdef CONFIG_PLUGIN
135
+ if (tcg_ctx->plugin_insn != NULL) {
136
+ /* Save a copy of the vaddr for use after a load. */
137
+ TCGv temp = tcg_temp_new();
138
+ tcg_gen_mov_tl(temp, vaddr);
139
+ return temp;
140
+ }
141
+#endif
142
+ return vaddr;
143
+}
144
+
145
+static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
146
+ enum qemu_plugin_mem_rw rw)
147
+{
148
+#ifdef CONFIG_PLUGIN
149
+ if (tcg_ctx->plugin_insn != NULL) {
150
+ qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
151
+ plugin_gen_empty_mem_callback(vaddr, info);
152
+ tcg_temp_free(vaddr);
153
+ }
154
+#endif
155
+}
156
+
157
+void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
158
+{
159
+ MemOp orig_memop;
160
+ MemOpIdx oi;
161
+
162
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
163
+ memop = tcg_canonicalize_memop(memop, 0, 0);
164
+ oi = make_memop_idx(memop, idx);
165
+
166
+ orig_memop = memop;
167
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
168
+ memop &= ~MO_BSWAP;
169
+ /* The bswap primitive benefits from zero-extended input. */
170
+ if ((memop & MO_SSIZE) == MO_SW) {
171
+ memop &= ~MO_SIGN;
172
+ }
173
+ }
174
+
175
+ addr = plugin_prep_mem_callbacks(addr);
176
+ gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
177
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
178
+
179
+ if ((orig_memop ^ memop) & MO_BSWAP) {
180
+ switch (orig_memop & MO_SIZE) {
181
+ case MO_16:
182
+ tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
183
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
184
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
185
+ break;
186
+ case MO_32:
187
+ tcg_gen_bswap32_i32(val, val);
188
+ break;
189
+ default:
190
+ g_assert_not_reached();
191
+ }
192
+ }
193
+}
194
+
195
+void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
196
+{
197
+ TCGv_i32 swap = NULL;
198
+ MemOpIdx oi;
199
+
200
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
201
+ memop = tcg_canonicalize_memop(memop, 0, 1);
202
+ oi = make_memop_idx(memop, idx);
203
+
204
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
205
+ swap = tcg_temp_ebb_new_i32();
206
+ switch (memop & MO_SIZE) {
207
+ case MO_16:
208
+ tcg_gen_bswap16_i32(swap, val, 0);
209
+ break;
210
+ case MO_32:
211
+ tcg_gen_bswap32_i32(swap, val);
212
+ break;
213
+ default:
214
+ g_assert_not_reached();
215
+ }
216
+ val = swap;
217
+ memop &= ~MO_BSWAP;
218
+ }
219
+
220
+ addr = plugin_prep_mem_callbacks(addr);
221
+ if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
222
+ gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
223
+ } else {
224
+ gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
225
+ }
226
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
227
+
228
+ if (swap) {
229
+ tcg_temp_free_i32(swap);
230
+ }
231
+}
232
+
233
+void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
234
+{
235
+ MemOp orig_memop;
236
+ MemOpIdx oi;
237
+
238
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
239
+ tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
240
+ if (memop & MO_SIGN) {
241
+ tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
242
+ } else {
243
+ tcg_gen_movi_i32(TCGV_HIGH(val), 0);
244
+ }
245
+ return;
246
+ }
247
+
248
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
249
+ memop = tcg_canonicalize_memop(memop, 1, 0);
250
+ oi = make_memop_idx(memop, idx);
251
+
252
+ orig_memop = memop;
253
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
254
+ memop &= ~MO_BSWAP;
255
+ /* The bswap primitive benefits from zero-extended input. */
256
+ if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
257
+ memop &= ~MO_SIGN;
258
+ }
259
+ }
260
+
261
+ addr = plugin_prep_mem_callbacks(addr);
262
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
263
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
264
+
265
+ if ((orig_memop ^ memop) & MO_BSWAP) {
266
+ int flags = (orig_memop & MO_SIGN
267
+ ? TCG_BSWAP_IZ | TCG_BSWAP_OS
268
+ : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
269
+ switch (orig_memop & MO_SIZE) {
270
+ case MO_16:
271
+ tcg_gen_bswap16_i64(val, val, flags);
272
+ break;
273
+ case MO_32:
274
+ tcg_gen_bswap32_i64(val, val, flags);
275
+ break;
276
+ case MO_64:
277
+ tcg_gen_bswap64_i64(val, val);
278
+ break;
279
+ default:
280
+ g_assert_not_reached();
281
+ }
282
+ }
283
+}
284
+
285
+void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
286
+{
287
+ TCGv_i64 swap = NULL;
288
+ MemOpIdx oi;
289
+
290
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
291
+ tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
292
+ return;
293
+ }
294
+
295
+ tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
296
+ memop = tcg_canonicalize_memop(memop, 1, 1);
297
+ oi = make_memop_idx(memop, idx);
298
+
299
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
300
+ swap = tcg_temp_ebb_new_i64();
301
+ switch (memop & MO_SIZE) {
302
+ case MO_16:
303
+ tcg_gen_bswap16_i64(swap, val, 0);
304
+ break;
305
+ case MO_32:
306
+ tcg_gen_bswap32_i64(swap, val, 0);
307
+ break;
308
+ case MO_64:
309
+ tcg_gen_bswap64_i64(swap, val);
310
+ break;
311
+ default:
312
+ g_assert_not_reached();
313
+ }
314
+ val = swap;
315
+ memop &= ~MO_BSWAP;
316
+ }
317
+
318
+ addr = plugin_prep_mem_callbacks(addr);
319
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
320
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
321
+
322
+ if (swap) {
323
+ tcg_temp_free_i64(swap);
324
+ }
325
+}
326
+
327
+/*
328
+ * Return true if @mop, without knowledge of the pointer alignment,
329
+ * does not require 16-byte atomicity, and it would be adventagous
330
+ * to avoid a call to a helper function.
331
+ */
332
+static bool use_two_i64_for_i128(MemOp mop)
333
+{
334
+#ifdef CONFIG_SOFTMMU
335
+ /* Two softmmu tlb lookups is larger than one function call. */
336
+ return false;
337
+#else
338
+ /*
339
+ * For user-only, two 64-bit operations may well be smaller than a call.
340
+ * Determine if that would be legal for the requested atomicity.
109
+ */
341
+ */
110
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
342
+ switch (mop & MO_ATOM_MASK) {
111
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
343
+ case MO_ATOM_NONE:
112
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
344
+ case MO_ATOM_IFALIGN_PAIR:
113
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
345
+ return true;
114
+ 0x0000000000000001ULL, 0xfddbb9977553310aULL,
346
+ case MO_ATOM_IFALIGN:
115
+ 0x8000000000000001ULL, 0x78899aabbccddf05ULL},
347
+ case MO_ATOM_SUBALIGN:
116
+
348
+ case MO_ATOM_WITHIN16:
117
+ /* Dividend > 64 bits, divisor almost as big */
349
+ case MO_ATOM_WITHIN16_PAIR:
118
+ { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
350
+ /* In a serialized context, no atomicity is required. */
119
+ 0x0000000000000000ULL, 0x000000000000000fULL,
351
+ return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
120
+ 0x123456789abcdefeULL, 0x123456789abcde1fULL},
352
+ default:
353
+ g_assert_not_reached();
354
+ }
355
+#endif
356
+}
357
+
358
+static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
359
+{
360
+ MemOp mop_1 = orig, mop_2;
361
+
362
+ tcg_debug_assert((orig & MO_SIZE) == MO_128);
363
+ tcg_debug_assert((orig & MO_SIGN) == 0);
364
+
365
+ /* Reduce the size to 64-bit. */
366
+ mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
367
+
368
+ /* Retain the alignment constraints of the original. */
369
+ switch (orig & MO_AMASK) {
370
+ case MO_UNALN:
371
+ case MO_ALIGN_2:
372
+ case MO_ALIGN_4:
373
+ mop_2 = mop_1;
374
+ break;
375
+ case MO_ALIGN_8:
376
+ /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
377
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
378
+ mop_2 = mop_1;
379
+ break;
380
+ case MO_ALIGN:
381
+ /* Second has 8-byte alignment; first has 16-byte alignment. */
382
+ mop_2 = mop_1;
383
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
384
+ break;
385
+ case MO_ALIGN_16:
386
+ case MO_ALIGN_32:
387
+ case MO_ALIGN_64:
388
+ /* Second has 8-byte alignment; first retains original. */
389
+ mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
390
+ break;
391
+ default:
392
+ g_assert_not_reached();
393
+ }
394
+
395
+ /* Use a memory ordering implemented by the host. */
396
+ if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
397
+ mop_1 &= ~MO_BSWAP;
398
+ mop_2 &= ~MO_BSWAP;
399
+ }
400
+
401
+ ret[0] = mop_1;
402
+ ret[1] = mop_2;
403
+}
404
+
405
+#if TARGET_LONG_BITS == 64
406
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
407
+#else
408
+#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
409
+#endif
410
+
411
+void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
412
+{
413
+ const MemOpIdx oi = make_memop_idx(memop, idx);
414
+
415
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
416
+ tcg_debug_assert((memop & MO_SIGN) == 0);
417
+
418
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
419
+ addr = plugin_prep_mem_callbacks(addr);
420
+
421
+ /* TODO: For now, force 32-bit hosts to use the helper. */
422
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
423
+ TCGv_i64 lo, hi;
424
+ TCGArg addr_arg;
425
+ MemOpIdx adj_oi;
426
+ bool need_bswap = false;
427
+
428
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
429
+ lo = TCGV128_HIGH(val);
430
+ hi = TCGV128_LOW(val);
431
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
432
+ need_bswap = true;
433
+ } else {
434
+ lo = TCGV128_LOW(val);
435
+ hi = TCGV128_HIGH(val);
436
+ adj_oi = oi;
437
+ }
438
+
439
+#if TARGET_LONG_BITS == 32
440
+ addr_arg = tcgv_i32_arg(addr);
441
+#else
442
+ addr_arg = tcgv_i64_arg(addr);
443
+#endif
444
+ tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
445
+
446
+ if (need_bswap) {
447
+ tcg_gen_bswap64_i64(lo, lo);
448
+ tcg_gen_bswap64_i64(hi, hi);
449
+ }
450
+ } else if (use_two_i64_for_i128(memop)) {
451
+ MemOp mop[2];
452
+ TCGv addr_p8;
453
+ TCGv_i64 x, y;
454
+
455
+ canonicalize_memop_i128_as_i64(mop, memop);
456
+
457
+ /*
458
+ * Since there are no global TCGv_i128, there is no visible state
459
+ * changed if the second load faults. Load directly into the two
460
+ * subwords.
461
+ */
462
+ if ((memop & MO_BSWAP) == MO_LE) {
463
+ x = TCGV128_LOW(val);
464
+ y = TCGV128_HIGH(val);
465
+ } else {
466
+ x = TCGV128_HIGH(val);
467
+ y = TCGV128_LOW(val);
468
+ }
469
+
470
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
471
+
472
+ if ((mop[0] ^ memop) & MO_BSWAP) {
473
+ tcg_gen_bswap64_i64(x, x);
474
+ }
475
+
476
+ addr_p8 = tcg_temp_ebb_new();
477
+ tcg_gen_addi_tl(addr_p8, addr, 8);
478
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
479
+ tcg_temp_free(addr_p8);
480
+
481
+ if ((mop[0] ^ memop) & MO_BSWAP) {
482
+ tcg_gen_bswap64_i64(y, y);
483
+ }
484
+ } else {
485
+ gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
486
+ }
487
+
488
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
489
+}
490
+
491
+void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
492
+{
493
+ const MemOpIdx oi = make_memop_idx(memop, idx);
494
+
495
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
496
+ tcg_debug_assert((memop & MO_SIGN) == 0);
497
+
498
+ tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
499
+ addr = plugin_prep_mem_callbacks(addr);
500
+
501
+ /* TODO: For now, force 32-bit hosts to use the helper. */
502
+
503
+ if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
504
+ TCGv_i64 lo, hi;
505
+ TCGArg addr_arg;
506
+ MemOpIdx adj_oi;
507
+ bool need_bswap = false;
508
+
509
+ if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
510
+ lo = tcg_temp_new_i64();
511
+ hi = tcg_temp_new_i64();
512
+ tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
513
+ tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
514
+ adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
515
+ need_bswap = true;
516
+ } else {
517
+ lo = TCGV128_LOW(val);
518
+ hi = TCGV128_HIGH(val);
519
+ adj_oi = oi;
520
+ }
521
+
522
+#if TARGET_LONG_BITS == 32
523
+ addr_arg = tcgv_i32_arg(addr);
524
+#else
525
+ addr_arg = tcgv_i64_arg(addr);
526
+#endif
527
+ tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
528
+
529
+ if (need_bswap) {
530
+ tcg_temp_free_i64(lo);
531
+ tcg_temp_free_i64(hi);
532
+ }
533
+ } else if (use_two_i64_for_i128(memop)) {
534
+ MemOp mop[2];
535
+ TCGv addr_p8;
536
+ TCGv_i64 x, y;
537
+
538
+ canonicalize_memop_i128_as_i64(mop, memop);
539
+
540
+ if ((memop & MO_BSWAP) == MO_LE) {
541
+ x = TCGV128_LOW(val);
542
+ y = TCGV128_HIGH(val);
543
+ } else {
544
+ x = TCGV128_HIGH(val);
545
+ y = TCGV128_LOW(val);
546
+ }
547
+
548
+ addr_p8 = tcg_temp_ebb_new();
549
+ if ((mop[0] ^ memop) & MO_BSWAP) {
550
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
551
+
552
+ tcg_gen_bswap64_i64(t, x);
553
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
554
+ tcg_gen_bswap64_i64(t, y);
555
+ tcg_gen_addi_tl(addr_p8, addr, 8);
556
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
557
+ tcg_temp_free_i64(t);
558
+ } else {
559
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
560
+ tcg_gen_addi_tl(addr_p8, addr, 8);
561
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
562
+ }
563
+ tcg_temp_free(addr_p8);
564
+ } else {
565
+ gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
566
+ }
567
+
568
+ plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
569
+}
570
+
571
+static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
572
+{
573
+ switch (opc & MO_SSIZE) {
574
+ case MO_SB:
575
+ tcg_gen_ext8s_i32(ret, val);
576
+ break;
577
+ case MO_UB:
578
+ tcg_gen_ext8u_i32(ret, val);
579
+ break;
580
+ case MO_SW:
581
+ tcg_gen_ext16s_i32(ret, val);
582
+ break;
583
+ case MO_UW:
584
+ tcg_gen_ext16u_i32(ret, val);
585
+ break;
586
+ default:
587
+ tcg_gen_mov_i32(ret, val);
588
+ break;
589
+ }
590
+}
591
+
592
+static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
593
+{
594
+ switch (opc & MO_SSIZE) {
595
+ case MO_SB:
596
+ tcg_gen_ext8s_i64(ret, val);
597
+ break;
598
+ case MO_UB:
599
+ tcg_gen_ext8u_i64(ret, val);
600
+ break;
601
+ case MO_SW:
602
+ tcg_gen_ext16s_i64(ret, val);
603
+ break;
604
+ case MO_UW:
605
+ tcg_gen_ext16u_i64(ret, val);
606
+ break;
607
+ case MO_SL:
608
+ tcg_gen_ext32s_i64(ret, val);
609
+ break;
610
+ case MO_UL:
611
+ tcg_gen_ext32u_i64(ret, val);
612
+ break;
613
+ default:
614
+ tcg_gen_mov_i64(ret, val);
615
+ break;
616
+ }
617
+}
618
+
619
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
620
+ TCGv_i32, TCGv_i32, TCGv_i32);
621
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
622
+ TCGv_i64, TCGv_i64, TCGv_i32);
623
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
624
+ TCGv_i128, TCGv_i128, TCGv_i32);
625
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
626
+ TCGv_i32, TCGv_i32);
627
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
628
+ TCGv_i64, TCGv_i32);
629
+
630
+#ifdef CONFIG_ATOMIC64
631
+# define WITH_ATOMIC64(X) X,
632
+#else
633
+# define WITH_ATOMIC64(X)
634
+#endif
635
+#ifdef CONFIG_CMPXCHG128
636
+# define WITH_ATOMIC128(X) X,
637
+#else
638
+# define WITH_ATOMIC128(X)
639
+#endif
640
+
641
+static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
642
+ [MO_8] = gen_helper_atomic_cmpxchgb,
643
+ [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
644
+ [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
645
+ [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
646
+ [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
647
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
648
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
649
+ WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
650
+ WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
121
+};
651
+};
122
+
652
+
123
+static const test_data_signed test_table_signed[] = {
653
+void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
124
+ /* Positive dividend, positive/negative divisors */
654
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
125
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
655
+{
126
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
656
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
127
+ 0x0000000000000001LL, 0x0000000000000000LL},
657
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
128
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
658
+
129
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
659
+ tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
130
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
660
+
131
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
661
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
132
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
662
+ tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
133
+ 0x0000000000000002LL, 0x0000000000000000LL},
663
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
134
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
664
+ tcg_temp_free_i32(t2);
135
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
665
+
136
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
666
+ if (memop & MO_SIGN) {
137
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
667
+ tcg_gen_ext_i32(retv, t1, memop);
138
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
668
+ } else {
139
+ 0x0000000000000008LL, 0x0000000000000006LL},
669
+ tcg_gen_mov_i32(retv, t1);
140
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
670
+ }
141
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
671
+ tcg_temp_free_i32(t1);
142
+ 0xfffffffffffffff8LL, 0x0000000000000006LL},
672
+}
143
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
673
+
144
+ 0x0000000000000000LL, 0x000000000000550dULL,
674
+void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
145
+ 0x0000000000000237LL, 0x0000000000000183LL},
675
+ TCGv_i32 newv, TCGArg idx, MemOp memop)
146
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
676
+{
147
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
677
+ gen_atomic_cx_i32 gen;
148
+ 0xfffffffffffffdc9LL, 0x0000000000000183LL},
678
+ MemOpIdx oi;
149
+
679
+
150
+ /* Negative dividend, positive/negative divisors */
680
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
151
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
681
+ tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
152
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
682
+ return;
153
+ 0x0000000000000001LL, 0x0000000000000000LL},
683
+ }
154
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
684
+
155
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
685
+ memop = tcg_canonicalize_memop(memop, 0, 0);
156
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
686
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
157
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
687
+ tcg_debug_assert(gen != NULL);
158
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
688
+
159
+ 0x0000000000000002LL, 0x0000000000000000LL},
689
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
160
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
690
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
161
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
691
+
162
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
692
+ if (memop & MO_SIGN) {
163
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
693
+ tcg_gen_ext_i32(retv, retv, memop);
164
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
694
+ }
165
+ 0x0000000000000008LL, 0xfffffffffffffffaLL},
695
+}
166
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
696
+
167
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
697
+void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
168
+ 0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
698
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
169
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
699
+{
170
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
700
+ TCGv_i64 t1, t2;
171
+ 0x0000000000000237LL, 0xfffffffffffffe7dLL},
701
+
172
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
702
+ if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
173
+ 0x0000000000000000LL, 0x000000000000550dULL,
703
+ tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
174
+ 0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
704
+ TCGV_LOW(newv), idx, memop);
175
+};
705
+ if (memop & MO_SIGN) {
176
+
706
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
177
+static void test_divu128(void)
707
+ } else {
178
+{
708
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
179
+ int i;
709
+ }
180
+ uint64_t rem;
710
+ return;
181
+ test_data_unsigned tmp;
711
+ }
182
+
712
+
183
+ for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
713
+ t1 = tcg_temp_ebb_new_i64();
184
+ tmp = test_table_unsigned[i];
714
+ t2 = tcg_temp_ebb_new_i64();
185
+
715
+
186
+ rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
716
+ tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
187
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
717
+
188
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
718
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
189
+ g_assert_cmpuint(rem, ==, tmp.remainder);
719
+ tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
190
+ }
720
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
191
+}
721
+ tcg_temp_free_i64(t2);
192
+
722
+
193
+static void test_divs128(void)
723
+ if (memop & MO_SIGN) {
194
+{
724
+ tcg_gen_ext_i64(retv, t1, memop);
195
+ int i;
725
+ } else {
196
+ int64_t rem;
726
+ tcg_gen_mov_i64(retv, t1);
197
+ test_data_signed tmp;
727
+ }
198
+
728
+ tcg_temp_free_i64(t1);
199
+ for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
729
+}
200
+ tmp = test_table_signed[i];
730
+
201
+
731
+void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
202
+ rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
732
+ TCGv_i64 newv, TCGArg idx, MemOp memop)
203
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
733
+{
204
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
734
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
205
+ g_assert_cmpuint(rem, ==, tmp.remainder);
735
+ tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
206
+ }
736
+ return;
207
+}
737
+ }
208
+
738
+
209
+int main(int argc, char **argv)
739
+ if ((memop & MO_SIZE) == MO_64) {
210
+{
740
+ gen_atomic_cx_i64 gen;
211
+ g_test_init(&argc, &argv, NULL);
741
+
212
+ g_test_add_func("/host-utils/test_divu128", test_divu128);
742
+ memop = tcg_canonicalize_memop(memop, 1, 0);
213
+ g_test_add_func("/host-utils/test_divs128", test_divs128);
743
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
214
+ return g_test_run();
744
+ if (gen) {
215
+}
745
+ MemOpIdx oi = make_memop_idx(memop, idx);
216
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
746
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
747
+ return;
748
+ }
749
+
750
+ gen_helper_exit_atomic(cpu_env);
751
+
752
+ /*
753
+ * Produce a result for a well-formed opcode stream. This satisfies
754
+ * liveness for set before used, which happens before this dead code
755
+ * is removed.
756
+ */
757
+ tcg_gen_movi_i64(retv, 0);
758
+ return;
759
+ }
760
+
761
+ if (TCG_TARGET_REG_BITS == 32) {
762
+ tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
763
+ TCGV_LOW(newv), idx, memop);
764
+ if (memop & MO_SIGN) {
765
+ tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
766
+ } else {
767
+ tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
768
+ }
769
+ } else {
770
+ TCGv_i32 c32 = tcg_temp_ebb_new_i32();
771
+ TCGv_i32 n32 = tcg_temp_ebb_new_i32();
772
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
773
+
774
+ tcg_gen_extrl_i64_i32(c32, cmpv);
775
+ tcg_gen_extrl_i64_i32(n32, newv);
776
+ tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
777
+ tcg_temp_free_i32(c32);
778
+ tcg_temp_free_i32(n32);
779
+
780
+ tcg_gen_extu_i32_i64(retv, r32);
781
+ tcg_temp_free_i32(r32);
782
+
783
+ if (memop & MO_SIGN) {
784
+ tcg_gen_ext_i64(retv, retv, memop);
785
+ }
786
+ }
787
+}
788
+
789
+void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
790
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
791
+{
792
+ if (TCG_TARGET_REG_BITS == 32) {
793
+ /* Inline expansion below is simply too large for 32-bit hosts. */
794
+ gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
795
+ ? gen_helper_nonatomic_cmpxchgo_le
796
+ : gen_helper_nonatomic_cmpxchgo_be);
797
+ MemOpIdx oi = make_memop_idx(memop, idx);
798
+
799
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
800
+ tcg_debug_assert((memop & MO_SIGN) == 0);
801
+
802
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
803
+ } else {
804
+ TCGv_i128 oldv = tcg_temp_ebb_new_i128();
805
+ TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
806
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
807
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
808
+ TCGv_i64 z = tcg_constant_i64(0);
809
+
810
+ tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
811
+
812
+ /* Compare i128 */
813
+ tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
814
+ tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
815
+ tcg_gen_or_i64(t0, t0, t1);
816
+
817
+ /* tmpv = equal ? newv : oldv */
818
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
819
+ TCGV128_LOW(newv), TCGV128_LOW(oldv));
820
+ tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
821
+ TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
822
+
823
+ /* Unconditional writeback. */
824
+ tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
825
+ tcg_gen_mov_i128(retv, oldv);
826
+
827
+ tcg_temp_free_i64(t0);
828
+ tcg_temp_free_i64(t1);
829
+ tcg_temp_free_i128(tmpv);
830
+ tcg_temp_free_i128(oldv);
831
+ }
832
+}
833
+
834
+void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
835
+ TCGv_i128 newv, TCGArg idx, MemOp memop)
836
+{
837
+ gen_atomic_cx_i128 gen;
838
+
839
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
840
+ tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
841
+ return;
842
+ }
843
+
844
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
845
+ tcg_debug_assert((memop & MO_SIGN) == 0);
846
+ gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
847
+
848
+ if (gen) {
849
+ MemOpIdx oi = make_memop_idx(memop, idx);
850
+ gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
851
+ return;
852
+ }
853
+
854
+ gen_helper_exit_atomic(cpu_env);
855
+
856
+ /*
857
+ * Produce a result for a well-formed opcode stream. This satisfies
858
+ * liveness for set before used, which happens before this dead code
859
+ * is removed.
860
+ */
861
+ tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
862
+ tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
863
+}
864
+
865
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
866
+ TCGArg idx, MemOp memop, bool new_val,
867
+ void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
868
+{
869
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
870
+ TCGv_i32 t2 = tcg_temp_ebb_new_i32();
871
+
872
+ memop = tcg_canonicalize_memop(memop, 0, 0);
873
+
874
+ tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
875
+ tcg_gen_ext_i32(t2, val, memop);
876
+ gen(t2, t1, t2);
877
+ tcg_gen_qemu_st_i32(t2, addr, idx, memop);
878
+
879
+ tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
880
+ tcg_temp_free_i32(t1);
881
+ tcg_temp_free_i32(t2);
882
+}
883
+
884
+static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
885
+ TCGArg idx, MemOp memop, void * const table[])
886
+{
887
+ gen_atomic_op_i32 gen;
888
+ MemOpIdx oi;
889
+
890
+ memop = tcg_canonicalize_memop(memop, 0, 0);
891
+
892
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
893
+ tcg_debug_assert(gen != NULL);
894
+
895
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
896
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
897
+
898
+ if (memop & MO_SIGN) {
899
+ tcg_gen_ext_i32(ret, ret, memop);
900
+ }
901
+}
902
+
903
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
904
+ TCGArg idx, MemOp memop, bool new_val,
905
+ void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
906
+{
907
+ TCGv_i64 t1 = tcg_temp_ebb_new_i64();
908
+ TCGv_i64 t2 = tcg_temp_ebb_new_i64();
909
+
910
+ memop = tcg_canonicalize_memop(memop, 1, 0);
911
+
912
+ tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
913
+ tcg_gen_ext_i64(t2, val, memop);
914
+ gen(t2, t1, t2);
915
+ tcg_gen_qemu_st_i64(t2, addr, idx, memop);
916
+
917
+ tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
918
+ tcg_temp_free_i64(t1);
919
+ tcg_temp_free_i64(t2);
920
+}
921
+
922
+static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
923
+ TCGArg idx, MemOp memop, void * const table[])
924
+{
925
+ memop = tcg_canonicalize_memop(memop, 1, 0);
926
+
927
+ if ((memop & MO_SIZE) == MO_64) {
928
+#ifdef CONFIG_ATOMIC64
929
+ gen_atomic_op_i64 gen;
930
+ MemOpIdx oi;
931
+
932
+ gen = table[memop & (MO_SIZE | MO_BSWAP)];
933
+ tcg_debug_assert(gen != NULL);
934
+
935
+ oi = make_memop_idx(memop & ~MO_SIGN, idx);
936
+ gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
937
+#else
938
+ gen_helper_exit_atomic(cpu_env);
939
+ /* Produce a result, so that we have a well-formed opcode stream
940
+ with respect to uses of the result in the (dead) code following. */
941
+ tcg_gen_movi_i64(ret, 0);
942
+#endif /* CONFIG_ATOMIC64 */
943
+ } else {
944
+ TCGv_i32 v32 = tcg_temp_ebb_new_i32();
945
+ TCGv_i32 r32 = tcg_temp_ebb_new_i32();
946
+
947
+ tcg_gen_extrl_i64_i32(v32, val);
948
+ do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
949
+ tcg_temp_free_i32(v32);
950
+
951
+ tcg_gen_extu_i32_i64(ret, r32);
952
+ tcg_temp_free_i32(r32);
953
+
954
+ if (memop & MO_SIGN) {
955
+ tcg_gen_ext_i64(ret, ret, memop);
956
+ }
957
+ }
958
+}
959
+
960
+#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
961
+static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
962
+ [MO_8] = gen_helper_atomic_##NAME##b, \
963
+ [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
964
+ [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
965
+ [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
966
+ [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
967
+ WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
968
+ WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
969
+}; \
970
+void tcg_gen_atomic_##NAME##_i32 \
971
+ (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
972
+{ \
973
+ if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
974
+ do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
975
+ } else { \
976
+ do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
977
+ tcg_gen_##OP##_i32); \
978
+ } \
979
+} \
980
+void tcg_gen_atomic_##NAME##_i64 \
981
+ (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
982
+{ \
983
+ if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
984
+ do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
985
+ } else { \
986
+ do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
987
+ tcg_gen_##OP##_i64); \
988
+ } \
989
+}
990
+
991
+GEN_ATOMIC_HELPER(fetch_add, add, 0)
992
+GEN_ATOMIC_HELPER(fetch_and, and, 0)
993
+GEN_ATOMIC_HELPER(fetch_or, or, 0)
994
+GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
995
+GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
996
+GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
997
+GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
998
+GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
999
+
1000
+GEN_ATOMIC_HELPER(add_fetch, add, 1)
1001
+GEN_ATOMIC_HELPER(and_fetch, and, 1)
1002
+GEN_ATOMIC_HELPER(or_fetch, or, 1)
1003
+GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1004
+GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1005
+GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1006
+GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1007
+GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1008
+
1009
+static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1010
+{
1011
+ tcg_gen_mov_i32(r, b);
1012
+}
1013
+
1014
+static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1015
+{
1016
+ tcg_gen_mov_i64(r, b);
1017
+}
1018
+
1019
+GEN_ATOMIC_HELPER(xchg, mov2, 0)
1020
+
1021
+#undef GEN_ATOMIC_HELPER
1022
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
217
index XXXXXXX..XXXXXXX 100644
1023
index XXXXXXX..XXXXXXX 100644
218
--- a/tests/unit/meson.build
1024
--- a/tcg/tcg-op.c
219
+++ b/tests/unit/meson.build
1025
+++ b/tcg/tcg-op.c
220
@@ -XXX,XX +XXX,XX @@ tests = {
1026
@@ -XXX,XX +XXX,XX @@
221
# all code tested by test-x86-cpuid is inside topology.h
1027
#include "tcg/tcg.h"
222
'test-x86-cpuid': [],
1028
#include "tcg/tcg-temp-internal.h"
223
'test-cutils': [],
1029
#include "tcg/tcg-op.h"
224
+ 'test-div128': [],
1030
-#include "tcg/tcg-mo.h"
225
'test-shift128': [],
1031
#include "exec/plugin-gen.h"
226
'test-mul64': [],
1032
#include "tcg-internal.h"
227
# all code tested by test-int128 is inside int128.h
1033
1034
@@ -XXX,XX +XXX,XX @@ void tcg_gen_lookup_and_goto_ptr(void)
1035
tcg_gen_op1i(INDEX_op_goto_ptr, tcgv_ptr_arg(ptr));
1036
tcg_temp_free_ptr(ptr);
1037
}
1038
-
1039
-static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
1040
-{
1041
- /* Trigger the asserts within as early as possible. */
1042
- unsigned a_bits = get_alignment_bits(op);
1043
-
1044
- /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
1045
- if (a_bits == (op & MO_SIZE)) {
1046
- op = (op & ~MO_AMASK) | MO_ALIGN;
1047
- }
1048
-
1049
- switch (op & MO_SIZE) {
1050
- case MO_8:
1051
- op &= ~MO_BSWAP;
1052
- break;
1053
- case MO_16:
1054
- break;
1055
- case MO_32:
1056
- if (!is64) {
1057
- op &= ~MO_SIGN;
1058
- }
1059
- break;
1060
- case MO_64:
1061
- if (is64) {
1062
- op &= ~MO_SIGN;
1063
- break;
1064
- }
1065
- /* fall through */
1066
- default:
1067
- g_assert_not_reached();
1068
- }
1069
- if (st) {
1070
- op &= ~MO_SIGN;
1071
- }
1072
- return op;
1073
-}
1074
-
1075
-static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
1076
- MemOp memop, TCGArg idx)
1077
-{
1078
- MemOpIdx oi = make_memop_idx(memop, idx);
1079
-#if TARGET_LONG_BITS == 32
1080
- tcg_gen_op3i_i32(opc, val, addr, oi);
1081
-#else
1082
- if (TCG_TARGET_REG_BITS == 32) {
1083
- tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
1084
- } else {
1085
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
1086
- }
1087
-#endif
1088
-}
1089
-
1090
-static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
1091
- MemOp memop, TCGArg idx)
1092
-{
1093
- MemOpIdx oi = make_memop_idx(memop, idx);
1094
-#if TARGET_LONG_BITS == 32
1095
- if (TCG_TARGET_REG_BITS == 32) {
1096
- tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
1097
- } else {
1098
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
1099
- }
1100
-#else
1101
- if (TCG_TARGET_REG_BITS == 32) {
1102
- tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
1103
- TCGV_LOW(addr), TCGV_HIGH(addr), oi);
1104
- } else {
1105
- tcg_gen_op3i_i64(opc, val, addr, oi);
1106
- }
1107
-#endif
1108
-}
1109
-
1110
-static void tcg_gen_req_mo(TCGBar type)
1111
-{
1112
-#ifdef TCG_GUEST_DEFAULT_MO
1113
- type &= TCG_GUEST_DEFAULT_MO;
1114
-#endif
1115
- type &= ~TCG_TARGET_DEFAULT_MO;
1116
- if (type) {
1117
- tcg_gen_mb(type | TCG_BAR_SC);
1118
- }
1119
-}
1120
-
1121
-static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
1122
-{
1123
-#ifdef CONFIG_PLUGIN
1124
- if (tcg_ctx->plugin_insn != NULL) {
1125
- /* Save a copy of the vaddr for use after a load. */
1126
- TCGv temp = tcg_temp_new();
1127
- tcg_gen_mov_tl(temp, vaddr);
1128
- return temp;
1129
- }
1130
-#endif
1131
- return vaddr;
1132
-}
1133
-
1134
-static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
1135
- enum qemu_plugin_mem_rw rw)
1136
-{
1137
-#ifdef CONFIG_PLUGIN
1138
- if (tcg_ctx->plugin_insn != NULL) {
1139
- qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
1140
- plugin_gen_empty_mem_callback(vaddr, info);
1141
- tcg_temp_free(vaddr);
1142
- }
1143
-#endif
1144
-}
1145
-
1146
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1147
-{
1148
- MemOp orig_memop;
1149
- MemOpIdx oi;
1150
-
1151
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1152
- memop = tcg_canonicalize_memop(memop, 0, 0);
1153
- oi = make_memop_idx(memop, idx);
1154
-
1155
- orig_memop = memop;
1156
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1157
- memop &= ~MO_BSWAP;
1158
- /* The bswap primitive benefits from zero-extended input. */
1159
- if ((memop & MO_SSIZE) == MO_SW) {
1160
- memop &= ~MO_SIGN;
1161
- }
1162
- }
1163
-
1164
- addr = plugin_prep_mem_callbacks(addr);
1165
- gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
1166
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1167
-
1168
- if ((orig_memop ^ memop) & MO_BSWAP) {
1169
- switch (orig_memop & MO_SIZE) {
1170
- case MO_16:
1171
- tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
1172
- ? TCG_BSWAP_IZ | TCG_BSWAP_OS
1173
- : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
1174
- break;
1175
- case MO_32:
1176
- tcg_gen_bswap32_i32(val, val);
1177
- break;
1178
- default:
1179
- g_assert_not_reached();
1180
- }
1181
- }
1182
-}
1183
-
1184
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
1185
-{
1186
- TCGv_i32 swap = NULL;
1187
- MemOpIdx oi;
1188
-
1189
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1190
- memop = tcg_canonicalize_memop(memop, 0, 1);
1191
- oi = make_memop_idx(memop, idx);
1192
-
1193
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1194
- swap = tcg_temp_ebb_new_i32();
1195
- switch (memop & MO_SIZE) {
1196
- case MO_16:
1197
- tcg_gen_bswap16_i32(swap, val, 0);
1198
- break;
1199
- case MO_32:
1200
- tcg_gen_bswap32_i32(swap, val);
1201
- break;
1202
- default:
1203
- g_assert_not_reached();
1204
- }
1205
- val = swap;
1206
- memop &= ~MO_BSWAP;
1207
- }
1208
-
1209
- addr = plugin_prep_mem_callbacks(addr);
1210
- if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
1211
- gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
1212
- } else {
1213
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
1214
- }
1215
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1216
-
1217
- if (swap) {
1218
- tcg_temp_free_i32(swap);
1219
- }
1220
-}
1221
-
1222
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1223
-{
1224
- MemOp orig_memop;
1225
- MemOpIdx oi;
1226
-
1227
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1228
- tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
1229
- if (memop & MO_SIGN) {
1230
- tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
1231
- } else {
1232
- tcg_gen_movi_i32(TCGV_HIGH(val), 0);
1233
- }
1234
- return;
1235
- }
1236
-
1237
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1238
- memop = tcg_canonicalize_memop(memop, 1, 0);
1239
- oi = make_memop_idx(memop, idx);
1240
-
1241
- orig_memop = memop;
1242
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1243
- memop &= ~MO_BSWAP;
1244
- /* The bswap primitive benefits from zero-extended input. */
1245
- if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
1246
- memop &= ~MO_SIGN;
1247
- }
1248
- }
1249
-
1250
- addr = plugin_prep_mem_callbacks(addr);
1251
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
1252
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1253
-
1254
- if ((orig_memop ^ memop) & MO_BSWAP) {
1255
- int flags = (orig_memop & MO_SIGN
1256
- ? TCG_BSWAP_IZ | TCG_BSWAP_OS
1257
- : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
1258
- switch (orig_memop & MO_SIZE) {
1259
- case MO_16:
1260
- tcg_gen_bswap16_i64(val, val, flags);
1261
- break;
1262
- case MO_32:
1263
- tcg_gen_bswap32_i64(val, val, flags);
1264
- break;
1265
- case MO_64:
1266
- tcg_gen_bswap64_i64(val, val);
1267
- break;
1268
- default:
1269
- g_assert_not_reached();
1270
- }
1271
- }
1272
-}
1273
-
1274
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
1275
-{
1276
- TCGv_i64 swap = NULL;
1277
- MemOpIdx oi;
1278
-
1279
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1280
- tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
1281
- return;
1282
- }
1283
-
1284
- tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
1285
- memop = tcg_canonicalize_memop(memop, 1, 1);
1286
- oi = make_memop_idx(memop, idx);
1287
-
1288
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1289
- swap = tcg_temp_ebb_new_i64();
1290
- switch (memop & MO_SIZE) {
1291
- case MO_16:
1292
- tcg_gen_bswap16_i64(swap, val, 0);
1293
- break;
1294
- case MO_32:
1295
- tcg_gen_bswap32_i64(swap, val, 0);
1296
- break;
1297
- case MO_64:
1298
- tcg_gen_bswap64_i64(swap, val);
1299
- break;
1300
- default:
1301
- g_assert_not_reached();
1302
- }
1303
- val = swap;
1304
- memop &= ~MO_BSWAP;
1305
- }
1306
-
1307
- addr = plugin_prep_mem_callbacks(addr);
1308
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
1309
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1310
-
1311
- if (swap) {
1312
- tcg_temp_free_i64(swap);
1313
- }
1314
-}
1315
-
1316
-/*
1317
- * Return true if @mop, without knowledge of the pointer alignment,
1318
- * does not require 16-byte atomicity, and it would be adventagous
1319
- * to avoid a call to a helper function.
1320
- */
1321
-static bool use_two_i64_for_i128(MemOp mop)
1322
-{
1323
-#ifdef CONFIG_SOFTMMU
1324
- /* Two softmmu tlb lookups is larger than one function call. */
1325
- return false;
1326
-#else
1327
- /*
1328
- * For user-only, two 64-bit operations may well be smaller than a call.
1329
- * Determine if that would be legal for the requested atomicity.
1330
- */
1331
- switch (mop & MO_ATOM_MASK) {
1332
- case MO_ATOM_NONE:
1333
- case MO_ATOM_IFALIGN_PAIR:
1334
- return true;
1335
- case MO_ATOM_IFALIGN:
1336
- case MO_ATOM_SUBALIGN:
1337
- case MO_ATOM_WITHIN16:
1338
- case MO_ATOM_WITHIN16_PAIR:
1339
- /* In a serialized context, no atomicity is required. */
1340
- return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
1341
- default:
1342
- g_assert_not_reached();
1343
- }
1344
-#endif
1345
-}
1346
-
1347
-static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
1348
-{
1349
- MemOp mop_1 = orig, mop_2;
1350
-
1351
- tcg_debug_assert((orig & MO_SIZE) == MO_128);
1352
- tcg_debug_assert((orig & MO_SIGN) == 0);
1353
-
1354
- /* Reduce the size to 64-bit. */
1355
- mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
1356
-
1357
- /* Retain the alignment constraints of the original. */
1358
- switch (orig & MO_AMASK) {
1359
- case MO_UNALN:
1360
- case MO_ALIGN_2:
1361
- case MO_ALIGN_4:
1362
- mop_2 = mop_1;
1363
- break;
1364
- case MO_ALIGN_8:
1365
- /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
1366
- mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
1367
- mop_2 = mop_1;
1368
- break;
1369
- case MO_ALIGN:
1370
- /* Second has 8-byte alignment; first has 16-byte alignment. */
1371
- mop_2 = mop_1;
1372
- mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
1373
- break;
1374
- case MO_ALIGN_16:
1375
- case MO_ALIGN_32:
1376
- case MO_ALIGN_64:
1377
- /* Second has 8-byte alignment; first retains original. */
1378
- mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
1379
- break;
1380
- default:
1381
- g_assert_not_reached();
1382
- }
1383
-
1384
- /* Use a memory ordering implemented by the host. */
1385
- if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
1386
- mop_1 &= ~MO_BSWAP;
1387
- mop_2 &= ~MO_BSWAP;
1388
- }
1389
-
1390
- ret[0] = mop_1;
1391
- ret[1] = mop_2;
1392
-}
1393
-
1394
-#if TARGET_LONG_BITS == 64
1395
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
1396
-#else
1397
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
1398
-#endif
1399
-
1400
-void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1401
-{
1402
- const MemOpIdx oi = make_memop_idx(memop, idx);
1403
-
1404
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1405
- tcg_debug_assert((memop & MO_SIGN) == 0);
1406
-
1407
- tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
1408
- addr = plugin_prep_mem_callbacks(addr);
1409
-
1410
- /* TODO: For now, force 32-bit hosts to use the helper. */
1411
- if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
1412
- TCGv_i64 lo, hi;
1413
- TCGArg addr_arg;
1414
- MemOpIdx adj_oi;
1415
- bool need_bswap = false;
1416
-
1417
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1418
- lo = TCGV128_HIGH(val);
1419
- hi = TCGV128_LOW(val);
1420
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
1421
- need_bswap = true;
1422
- } else {
1423
- lo = TCGV128_LOW(val);
1424
- hi = TCGV128_HIGH(val);
1425
- adj_oi = oi;
1426
- }
1427
-
1428
-#if TARGET_LONG_BITS == 32
1429
- addr_arg = tcgv_i32_arg(addr);
1430
-#else
1431
- addr_arg = tcgv_i64_arg(addr);
1432
-#endif
1433
- tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
1434
-
1435
- if (need_bswap) {
1436
- tcg_gen_bswap64_i64(lo, lo);
1437
- tcg_gen_bswap64_i64(hi, hi);
1438
- }
1439
- } else if (use_two_i64_for_i128(memop)) {
1440
- MemOp mop[2];
1441
- TCGv addr_p8;
1442
- TCGv_i64 x, y;
1443
-
1444
- canonicalize_memop_i128_as_i64(mop, memop);
1445
-
1446
- /*
1447
- * Since there are no global TCGv_i128, there is no visible state
1448
- * changed if the second load faults. Load directly into the two
1449
- * subwords.
1450
- */
1451
- if ((memop & MO_BSWAP) == MO_LE) {
1452
- x = TCGV128_LOW(val);
1453
- y = TCGV128_HIGH(val);
1454
- } else {
1455
- x = TCGV128_HIGH(val);
1456
- y = TCGV128_LOW(val);
1457
- }
1458
-
1459
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
1460
-
1461
- if ((mop[0] ^ memop) & MO_BSWAP) {
1462
- tcg_gen_bswap64_i64(x, x);
1463
- }
1464
-
1465
- addr_p8 = tcg_temp_ebb_new();
1466
- tcg_gen_addi_tl(addr_p8, addr, 8);
1467
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
1468
- tcg_temp_free(addr_p8);
1469
-
1470
- if ((mop[0] ^ memop) & MO_BSWAP) {
1471
- tcg_gen_bswap64_i64(y, y);
1472
- }
1473
- } else {
1474
- gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
1475
- }
1476
-
1477
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
1478
-}
1479
-
1480
-void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
1481
-{
1482
- const MemOpIdx oi = make_memop_idx(memop, idx);
1483
-
1484
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1485
- tcg_debug_assert((memop & MO_SIGN) == 0);
1486
-
1487
- tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
1488
- addr = plugin_prep_mem_callbacks(addr);
1489
-
1490
- /* TODO: For now, force 32-bit hosts to use the helper. */
1491
-
1492
- if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
1493
- TCGv_i64 lo, hi;
1494
- TCGArg addr_arg;
1495
- MemOpIdx adj_oi;
1496
- bool need_bswap = false;
1497
-
1498
- if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
1499
- lo = tcg_temp_new_i64();
1500
- hi = tcg_temp_new_i64();
1501
- tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
1502
- tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
1503
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
1504
- need_bswap = true;
1505
- } else {
1506
- lo = TCGV128_LOW(val);
1507
- hi = TCGV128_HIGH(val);
1508
- adj_oi = oi;
1509
- }
1510
-
1511
-#if TARGET_LONG_BITS == 32
1512
- addr_arg = tcgv_i32_arg(addr);
1513
-#else
1514
- addr_arg = tcgv_i64_arg(addr);
1515
-#endif
1516
- tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
1517
-
1518
- if (need_bswap) {
1519
- tcg_temp_free_i64(lo);
1520
- tcg_temp_free_i64(hi);
1521
- }
1522
- } else if (use_two_i64_for_i128(memop)) {
1523
- MemOp mop[2];
1524
- TCGv addr_p8;
1525
- TCGv_i64 x, y;
1526
-
1527
- canonicalize_memop_i128_as_i64(mop, memop);
1528
-
1529
- if ((memop & MO_BSWAP) == MO_LE) {
1530
- x = TCGV128_LOW(val);
1531
- y = TCGV128_HIGH(val);
1532
- } else {
1533
- x = TCGV128_HIGH(val);
1534
- y = TCGV128_LOW(val);
1535
- }
1536
-
1537
- addr_p8 = tcg_temp_ebb_new();
1538
- if ((mop[0] ^ memop) & MO_BSWAP) {
1539
- TCGv_i64 t = tcg_temp_ebb_new_i64();
1540
-
1541
- tcg_gen_bswap64_i64(t, x);
1542
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
1543
- tcg_gen_bswap64_i64(t, y);
1544
- tcg_gen_addi_tl(addr_p8, addr, 8);
1545
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
1546
- tcg_temp_free_i64(t);
1547
- } else {
1548
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
1549
- tcg_gen_addi_tl(addr_p8, addr, 8);
1550
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
1551
- }
1552
- tcg_temp_free(addr_p8);
1553
- } else {
1554
- gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
1555
- }
1556
-
1557
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
1558
-}
1559
-
1560
-static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
1561
-{
1562
- switch (opc & MO_SSIZE) {
1563
- case MO_SB:
1564
- tcg_gen_ext8s_i32(ret, val);
1565
- break;
1566
- case MO_UB:
1567
- tcg_gen_ext8u_i32(ret, val);
1568
- break;
1569
- case MO_SW:
1570
- tcg_gen_ext16s_i32(ret, val);
1571
- break;
1572
- case MO_UW:
1573
- tcg_gen_ext16u_i32(ret, val);
1574
- break;
1575
- default:
1576
- tcg_gen_mov_i32(ret, val);
1577
- break;
1578
- }
1579
-}
1580
-
1581
-static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
1582
-{
1583
- switch (opc & MO_SSIZE) {
1584
- case MO_SB:
1585
- tcg_gen_ext8s_i64(ret, val);
1586
- break;
1587
- case MO_UB:
1588
- tcg_gen_ext8u_i64(ret, val);
1589
- break;
1590
- case MO_SW:
1591
- tcg_gen_ext16s_i64(ret, val);
1592
- break;
1593
- case MO_UW:
1594
- tcg_gen_ext16u_i64(ret, val);
1595
- break;
1596
- case MO_SL:
1597
- tcg_gen_ext32s_i64(ret, val);
1598
- break;
1599
- case MO_UL:
1600
- tcg_gen_ext32u_i64(ret, val);
1601
- break;
1602
- default:
1603
- tcg_gen_mov_i64(ret, val);
1604
- break;
1605
- }
1606
-}
1607
-
1608
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
1609
- TCGv_i32, TCGv_i32, TCGv_i32);
1610
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
1611
- TCGv_i64, TCGv_i64, TCGv_i32);
1612
-typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
1613
- TCGv_i128, TCGv_i128, TCGv_i32);
1614
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
1615
- TCGv_i32, TCGv_i32);
1616
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
1617
- TCGv_i64, TCGv_i32);
1618
-
1619
-#ifdef CONFIG_ATOMIC64
1620
-# define WITH_ATOMIC64(X) X,
1621
-#else
1622
-# define WITH_ATOMIC64(X)
1623
-#endif
1624
-#ifdef CONFIG_CMPXCHG128
1625
-# define WITH_ATOMIC128(X) X,
1626
-#else
1627
-# define WITH_ATOMIC128(X)
1628
-#endif
1629
-
1630
-static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
1631
- [MO_8] = gen_helper_atomic_cmpxchgb,
1632
- [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
1633
- [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
1634
- [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
1635
- [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
1636
- WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
1637
- WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
1638
- WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
1639
- WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
1640
-};
1641
-
1642
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1643
- TCGv_i32 newv, TCGArg idx, MemOp memop)
1644
-{
1645
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1646
- TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1647
-
1648
- tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
1649
-
1650
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
1651
- tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
1652
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
1653
- tcg_temp_free_i32(t2);
1654
-
1655
- if (memop & MO_SIGN) {
1656
- tcg_gen_ext_i32(retv, t1, memop);
1657
- } else {
1658
- tcg_gen_mov_i32(retv, t1);
1659
- }
1660
- tcg_temp_free_i32(t1);
1661
-}
1662
-
1663
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
1664
- TCGv_i32 newv, TCGArg idx, MemOp memop)
1665
-{
1666
- gen_atomic_cx_i32 gen;
1667
- MemOpIdx oi;
1668
-
1669
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1670
- tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
1671
- return;
1672
- }
1673
-
1674
- memop = tcg_canonicalize_memop(memop, 0, 0);
1675
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1676
- tcg_debug_assert(gen != NULL);
1677
-
1678
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1679
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1680
-
1681
- if (memop & MO_SIGN) {
1682
- tcg_gen_ext_i32(retv, retv, memop);
1683
- }
1684
-}
1685
-
1686
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1687
- TCGv_i64 newv, TCGArg idx, MemOp memop)
1688
-{
1689
- TCGv_i64 t1, t2;
1690
-
1691
- if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
1692
- tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1693
- TCGV_LOW(newv), idx, memop);
1694
- if (memop & MO_SIGN) {
1695
- tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1696
- } else {
1697
- tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1698
- }
1699
- return;
1700
- }
1701
-
1702
- t1 = tcg_temp_ebb_new_i64();
1703
- t2 = tcg_temp_ebb_new_i64();
1704
-
1705
- tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
1706
-
1707
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
1708
- tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
1709
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
1710
- tcg_temp_free_i64(t2);
1711
-
1712
- if (memop & MO_SIGN) {
1713
- tcg_gen_ext_i64(retv, t1, memop);
1714
- } else {
1715
- tcg_gen_mov_i64(retv, t1);
1716
- }
1717
- tcg_temp_free_i64(t1);
1718
-}
1719
-
1720
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
1721
- TCGv_i64 newv, TCGArg idx, MemOp memop)
1722
-{
1723
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1724
- tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
1725
- return;
1726
- }
1727
-
1728
- if ((memop & MO_SIZE) == MO_64) {
1729
- gen_atomic_cx_i64 gen;
1730
-
1731
- memop = tcg_canonicalize_memop(memop, 1, 0);
1732
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1733
- if (gen) {
1734
- MemOpIdx oi = make_memop_idx(memop, idx);
1735
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1736
- return;
1737
- }
1738
-
1739
- gen_helper_exit_atomic(cpu_env);
1740
-
1741
- /*
1742
- * Produce a result for a well-formed opcode stream. This satisfies
1743
- * liveness for set before used, which happens before this dead code
1744
- * is removed.
1745
- */
1746
- tcg_gen_movi_i64(retv, 0);
1747
- return;
1748
- }
1749
-
1750
- if (TCG_TARGET_REG_BITS == 32) {
1751
- tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
1752
- TCGV_LOW(newv), idx, memop);
1753
- if (memop & MO_SIGN) {
1754
- tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
1755
- } else {
1756
- tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
1757
- }
1758
- } else {
1759
- TCGv_i32 c32 = tcg_temp_ebb_new_i32();
1760
- TCGv_i32 n32 = tcg_temp_ebb_new_i32();
1761
- TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1762
-
1763
- tcg_gen_extrl_i64_i32(c32, cmpv);
1764
- tcg_gen_extrl_i64_i32(n32, newv);
1765
- tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
1766
- tcg_temp_free_i32(c32);
1767
- tcg_temp_free_i32(n32);
1768
-
1769
- tcg_gen_extu_i32_i64(retv, r32);
1770
- tcg_temp_free_i32(r32);
1771
-
1772
- if (memop & MO_SIGN) {
1773
- tcg_gen_ext_i64(retv, retv, memop);
1774
- }
1775
- }
1776
-}
1777
-
1778
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1779
- TCGv_i128 newv, TCGArg idx, MemOp memop)
1780
-{
1781
- if (TCG_TARGET_REG_BITS == 32) {
1782
- /* Inline expansion below is simply too large for 32-bit hosts. */
1783
- gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
1784
- ? gen_helper_nonatomic_cmpxchgo_le
1785
- : gen_helper_nonatomic_cmpxchgo_be);
1786
- MemOpIdx oi = make_memop_idx(memop, idx);
1787
-
1788
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1789
- tcg_debug_assert((memop & MO_SIGN) == 0);
1790
-
1791
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1792
- } else {
1793
- TCGv_i128 oldv = tcg_temp_ebb_new_i128();
1794
- TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
1795
- TCGv_i64 t0 = tcg_temp_ebb_new_i64();
1796
- TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1797
- TCGv_i64 z = tcg_constant_i64(0);
1798
-
1799
- tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
1800
-
1801
- /* Compare i128 */
1802
- tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
1803
- tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
1804
- tcg_gen_or_i64(t0, t0, t1);
1805
-
1806
- /* tmpv = equal ? newv : oldv */
1807
- tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
1808
- TCGV128_LOW(newv), TCGV128_LOW(oldv));
1809
- tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
1810
- TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
1811
-
1812
- /* Unconditional writeback. */
1813
- tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
1814
- tcg_gen_mov_i128(retv, oldv);
1815
-
1816
- tcg_temp_free_i64(t0);
1817
- tcg_temp_free_i64(t1);
1818
- tcg_temp_free_i128(tmpv);
1819
- tcg_temp_free_i128(oldv);
1820
- }
1821
-}
1822
-
1823
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
1824
- TCGv_i128 newv, TCGArg idx, MemOp memop)
1825
-{
1826
- gen_atomic_cx_i128 gen;
1827
-
1828
- if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
1829
- tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
1830
- return;
1831
- }
1832
-
1833
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
1834
- tcg_debug_assert((memop & MO_SIGN) == 0);
1835
- gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
1836
-
1837
- if (gen) {
1838
- MemOpIdx oi = make_memop_idx(memop, idx);
1839
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
1840
- return;
1841
- }
1842
-
1843
- gen_helper_exit_atomic(cpu_env);
1844
-
1845
- /*
1846
- * Produce a result for a well-formed opcode stream. This satisfies
1847
- * liveness for set before used, which happens before this dead code
1848
- * is removed.
1849
- */
1850
- tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
1851
- tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
1852
-}
1853
-
1854
-static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1855
- TCGArg idx, MemOp memop, bool new_val,
1856
- void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
1857
-{
1858
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
1859
- TCGv_i32 t2 = tcg_temp_ebb_new_i32();
1860
-
1861
- memop = tcg_canonicalize_memop(memop, 0, 0);
1862
-
1863
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
1864
- tcg_gen_ext_i32(t2, val, memop);
1865
- gen(t2, t1, t2);
1866
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
1867
-
1868
- tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
1869
- tcg_temp_free_i32(t1);
1870
- tcg_temp_free_i32(t2);
1871
-}
1872
-
1873
-static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
1874
- TCGArg idx, MemOp memop, void * const table[])
1875
-{
1876
- gen_atomic_op_i32 gen;
1877
- MemOpIdx oi;
1878
-
1879
- memop = tcg_canonicalize_memop(memop, 0, 0);
1880
-
1881
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
1882
- tcg_debug_assert(gen != NULL);
1883
-
1884
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1885
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
1886
-
1887
- if (memop & MO_SIGN) {
1888
- tcg_gen_ext_i32(ret, ret, memop);
1889
- }
1890
-}
1891
-
1892
-static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1893
- TCGArg idx, MemOp memop, bool new_val,
1894
- void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
1895
-{
1896
- TCGv_i64 t1 = tcg_temp_ebb_new_i64();
1897
- TCGv_i64 t2 = tcg_temp_ebb_new_i64();
1898
-
1899
- memop = tcg_canonicalize_memop(memop, 1, 0);
1900
-
1901
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
1902
- tcg_gen_ext_i64(t2, val, memop);
1903
- gen(t2, t1, t2);
1904
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
1905
-
1906
- tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
1907
- tcg_temp_free_i64(t1);
1908
- tcg_temp_free_i64(t2);
1909
-}
1910
-
1911
-static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
1912
- TCGArg idx, MemOp memop, void * const table[])
1913
-{
1914
- memop = tcg_canonicalize_memop(memop, 1, 0);
1915
-
1916
- if ((memop & MO_SIZE) == MO_64) {
1917
-#ifdef CONFIG_ATOMIC64
1918
- gen_atomic_op_i64 gen;
1919
- MemOpIdx oi;
1920
-
1921
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
1922
- tcg_debug_assert(gen != NULL);
1923
-
1924
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
1925
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
1926
-#else
1927
- gen_helper_exit_atomic(cpu_env);
1928
- /* Produce a result, so that we have a well-formed opcode stream
1929
- with respect to uses of the result in the (dead) code following. */
1930
- tcg_gen_movi_i64(ret, 0);
1931
-#endif /* CONFIG_ATOMIC64 */
1932
- } else {
1933
- TCGv_i32 v32 = tcg_temp_ebb_new_i32();
1934
- TCGv_i32 r32 = tcg_temp_ebb_new_i32();
1935
-
1936
- tcg_gen_extrl_i64_i32(v32, val);
1937
- do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
1938
- tcg_temp_free_i32(v32);
1939
-
1940
- tcg_gen_extu_i32_i64(ret, r32);
1941
- tcg_temp_free_i32(r32);
1942
-
1943
- if (memop & MO_SIGN) {
1944
- tcg_gen_ext_i64(ret, ret, memop);
1945
- }
1946
- }
1947
-}
1948
-
1949
-#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
1950
-static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
1951
- [MO_8] = gen_helper_atomic_##NAME##b, \
1952
- [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
1953
- [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
1954
- [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
1955
- [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
1956
- WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
1957
- WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
1958
-}; \
1959
-void tcg_gen_atomic_##NAME##_i32 \
1960
- (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
1961
-{ \
1962
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1963
- do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
1964
- } else { \
1965
- do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
1966
- tcg_gen_##OP##_i32); \
1967
- } \
1968
-} \
1969
-void tcg_gen_atomic_##NAME##_i64 \
1970
- (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
1971
-{ \
1972
- if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1973
- do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
1974
- } else { \
1975
- do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
1976
- tcg_gen_##OP##_i64); \
1977
- } \
1978
-}
1979
-
1980
-GEN_ATOMIC_HELPER(fetch_add, add, 0)
1981
-GEN_ATOMIC_HELPER(fetch_and, and, 0)
1982
-GEN_ATOMIC_HELPER(fetch_or, or, 0)
1983
-GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1984
-GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1985
-GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1986
-GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1987
-GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1988
-
1989
-GEN_ATOMIC_HELPER(add_fetch, add, 1)
1990
-GEN_ATOMIC_HELPER(and_fetch, and, 1)
1991
-GEN_ATOMIC_HELPER(or_fetch, or, 1)
1992
-GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1993
-GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1994
-GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1995
-GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1996
-GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1997
-
1998
-static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1999
-{
2000
- tcg_gen_mov_i32(r, b);
2001
-}
2002
-
2003
-static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
2004
-{
2005
- tcg_gen_mov_i64(r, b);
2006
-}
2007
-
2008
-GEN_ATOMIC_HELPER(xchg, mov2, 0)
2009
-
2010
-#undef GEN_ATOMIC_HELPER
2011
diff --git a/tcg/meson.build b/tcg/meson.build
2012
index XXXXXXX..XXXXXXX 100644
2013
--- a/tcg/meson.build
2014
+++ b/tcg/meson.build
2015
@@ -XXX,XX +XXX,XX @@ tcg_ss.add(files(
2016
'tcg.c',
2017
'tcg-common.c',
2018
'tcg-op.c',
2019
+ 'tcg-op-ldst.c',
2020
'tcg-op-gvec.c',
2021
'tcg-op-vec.c',
2022
))
228
--
2023
--
229
2.25.1
2024
2.34.1
230
2025
231
2026
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
We already pass uint64_t to restore_state_to_opc; this changes all
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
of the other uses from insn_start through the encoding to decoding.
3
4
Reviewed-by: Anton Johansson <anjo@rev.ng>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
7
---
5
tcg/optimize.c | 37 +++++++++++++++++++++----------------
8
include/tcg/tcg-op.h | 39 +++++++++------------------------------
6
1 file changed, 21 insertions(+), 16 deletions(-)
9
include/tcg/tcg-opc.h | 2 +-
7
10
include/tcg/tcg.h | 30 +++++++++++++++---------------
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
accel/tcg/translate-all.c | 28 ++++++++++++++++------------
9
index XXXXXXX..XXXXXXX 100644
12
tcg/tcg.c | 18 ++++--------------
10
--- a/tcg/optimize.c
13
5 files changed, 45 insertions(+), 72 deletions(-)
11
+++ b/tcg/optimize.c
14
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
15
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
13
return fold_const2(ctx, op);
16
index XXXXXXX..XXXXXXX 100644
14
}
17
--- a/include/tcg/tcg-op.h
15
18
+++ b/include/tcg/tcg-op.h
16
+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
19
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi)
17
+{
20
#endif
18
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
21
19
+ uint32_t a = arg_info(op->args[2])->val;
22
#if TARGET_INSN_START_WORDS == 1
20
+ uint32_t b = arg_info(op->args[3])->val;
23
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
21
+ uint64_t r = (uint64_t)a * b;
24
static inline void tcg_gen_insn_start(target_ulong pc)
22
+ TCGArg rl, rh;
25
{
23
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
26
- tcg_gen_op1(INDEX_op_insn_start, pc);
24
+
27
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 64 / TCG_TARGET_REG_BITS);
25
+ rl = op->args[0];
28
+ tcg_set_insn_start_param(op, 0, pc);
26
+ rh = op->args[1];
29
}
27
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
30
-# else
28
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
31
-static inline void tcg_gen_insn_start(target_ulong pc)
29
+ return true;
32
-{
33
- tcg_gen_op2(INDEX_op_insn_start, (uint32_t)pc, (uint32_t)(pc >> 32));
34
-}
35
-# endif
36
#elif TARGET_INSN_START_WORDS == 2
37
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
38
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
39
{
40
- tcg_gen_op2(INDEX_op_insn_start, pc, a1);
41
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 2 * 64 / TCG_TARGET_REG_BITS);
42
+ tcg_set_insn_start_param(op, 0, pc);
43
+ tcg_set_insn_start_param(op, 1, a1);
44
}
45
-# else
46
-static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1)
47
-{
48
- tcg_gen_op4(INDEX_op_insn_start,
49
- (uint32_t)pc, (uint32_t)(pc >> 32),
50
- (uint32_t)a1, (uint32_t)(a1 >> 32));
51
-}
52
-# endif
53
#elif TARGET_INSN_START_WORDS == 3
54
-# if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
55
static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
56
target_ulong a2)
57
{
58
- tcg_gen_op3(INDEX_op_insn_start, pc, a1, a2);
59
+ TCGOp *op = tcg_emit_op(INDEX_op_insn_start, 3 * 64 / TCG_TARGET_REG_BITS);
60
+ tcg_set_insn_start_param(op, 0, pc);
61
+ tcg_set_insn_start_param(op, 1, a1);
62
+ tcg_set_insn_start_param(op, 2, a2);
63
}
64
-# else
65
-static inline void tcg_gen_insn_start(target_ulong pc, target_ulong a1,
66
- target_ulong a2)
67
-{
68
- tcg_gen_op6(INDEX_op_insn_start,
69
- (uint32_t)pc, (uint32_t)(pc >> 32),
70
- (uint32_t)a1, (uint32_t)(a1 >> 32),
71
- (uint32_t)a2, (uint32_t)(a2 >> 32));
72
-}
73
-# endif
74
#else
75
# error "Unhandled number of operands to insn_start"
76
#endif
77
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/tcg/tcg-opc.h
80
+++ b/include/tcg/tcg-opc.h
81
@@ -XXX,XX +XXX,XX @@ DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
82
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
83
84
/* QEMU specific */
85
-DEF(insn_start, 0, 0, TLADDR_ARGS * TARGET_INSN_START_WORDS,
86
+DEF(insn_start, 0, 0, DATA64_ARGS * TARGET_INSN_START_WORDS,
87
TCG_OPF_NOT_PRESENT)
88
DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
89
DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
90
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
91
index XXXXXXX..XXXXXXX 100644
92
--- a/include/tcg/tcg.h
93
+++ b/include/tcg/tcg.h
94
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
95
TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS];
96
97
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
98
- target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
99
+ uint64_t gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
100
101
/* Exit to translator on overflow. */
102
sigjmp_buf jmp_trans;
103
@@ -XXX,XX +XXX,XX @@ static inline void tcg_set_insn_param(TCGOp *op, int arg, TCGArg v)
104
op->args[arg] = v;
105
}
106
107
-static inline target_ulong tcg_get_insn_start_param(TCGOp *op, int arg)
108
+static inline uint64_t tcg_get_insn_start_param(TCGOp *op, int arg)
109
{
110
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
111
- return tcg_get_insn_param(op, arg);
112
-#else
113
- return tcg_get_insn_param(op, arg * 2) |
114
- ((uint64_t)tcg_get_insn_param(op, arg * 2 + 1) << 32);
115
-#endif
116
+ if (TCG_TARGET_REG_BITS == 64) {
117
+ return tcg_get_insn_param(op, arg);
118
+ } else {
119
+ return deposit64(tcg_get_insn_param(op, arg * 2), 32, 32,
120
+ tcg_get_insn_param(op, arg * 2 + 1));
30
+ }
121
+ }
31
+ return false;
122
}
32
+}
123
33
+
124
-static inline void tcg_set_insn_start_param(TCGOp *op, int arg, target_ulong v)
34
static bool fold_nand(OptContext *ctx, TCGOp *op)
125
+static inline void tcg_set_insn_start_param(TCGOp *op, int arg, uint64_t v)
35
{
126
{
36
return fold_const2(ctx, op);
127
-#if TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
37
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
128
- tcg_set_insn_param(op, arg, v);
129
-#else
130
- tcg_set_insn_param(op, arg * 2, v);
131
- tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
132
-#endif
133
+ if (TCG_TARGET_REG_BITS == 64) {
134
+ tcg_set_insn_param(op, arg, v);
135
+ } else {
136
+ tcg_set_insn_param(op, arg * 2, v);
137
+ tcg_set_insn_param(op, arg * 2 + 1, v >> 32);
138
+ }
139
}
140
141
/* The last op that was emitted. */
142
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/accel/tcg/translate-all.c
145
+++ b/accel/tcg/translate-all.c
146
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
147
148
TBContext tb_ctx;
149
150
-/* Encode VAL as a signed leb128 sequence at P.
151
- Return P incremented past the encoded value. */
152
-static uint8_t *encode_sleb128(uint8_t *p, target_long val)
153
+/*
154
+ * Encode VAL as a signed leb128 sequence at P.
155
+ * Return P incremented past the encoded value.
156
+ */
157
+static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
158
{
159
int more, byte;
160
161
@@ -XXX,XX +XXX,XX @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val)
162
return p;
163
}
164
165
-/* Decode a signed leb128 sequence at *PP; increment *PP past the
166
- decoded value. Return the decoded value. */
167
-static target_long decode_sleb128(const uint8_t **pp)
168
+/*
169
+ * Decode a signed leb128 sequence at *PP; increment *PP past the
170
+ * decoded value. Return the decoded value.
171
+ */
172
+static int64_t decode_sleb128(const uint8_t **pp)
173
{
174
const uint8_t *p = *pp;
175
- target_long val = 0;
176
+ int64_t val = 0;
177
int byte, shift = 0;
178
179
do {
180
byte = *p++;
181
- val |= (target_ulong)(byte & 0x7f) << shift;
182
+ val |= (int64_t)(byte & 0x7f) << shift;
183
shift += 7;
184
} while (byte & 0x80);
185
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
186
- val |= -(target_ulong)1 << shift;
187
+ val |= -(int64_t)1 << shift;
188
}
189
190
*pp = p;
191
@@ -XXX,XX +XXX,XX @@ static int encode_search(TranslationBlock *tb, uint8_t *block)
192
int i, j, n;
193
194
for (i = 0, n = tb->icount; i < n; ++i) {
195
- target_ulong prev;
196
+ uint64_t prev;
197
198
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
199
if (i == 0) {
200
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
201
/* Dump header and the first instruction */
202
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
203
fprintf(logfile,
204
- " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
205
+ " -- guest addr 0x%016" PRIx64 " + tb prologue\n",
206
tcg_ctx->gen_insn_data[insn][0]);
207
chunk_start = tcg_ctx->gen_insn_end_off[insn];
208
disas(logfile, tb->tc.ptr, chunk_start);
209
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
210
while (insn < tb->icount) {
211
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
212
if (chunk_end > chunk_start) {
213
- fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n",
214
+ fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
215
tcg_ctx->gen_insn_data[insn][0]);
216
disas(logfile, tb->tc.ptr + chunk_start,
217
chunk_end - chunk_start);
218
diff --git a/tcg/tcg.c b/tcg/tcg.c
219
index XXXXXXX..XXXXXXX 100644
220
--- a/tcg/tcg.c
221
+++ b/tcg/tcg.c
222
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
223
col += ne_fprintf(f, "\n ----");
224
225
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
226
- target_ulong a;
227
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
228
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
229
-#else
230
- a = op->args[i];
231
-#endif
232
- col += ne_fprintf(f, " " TARGET_FMT_lx, a);
233
+ col += ne_fprintf(f, " %016" PRIx64,
234
+ tcg_get_insn_start_param(op, i));
235
}
236
} else if (c == INDEX_op_call) {
237
const TCGHelperInfo *info = tcg_call_info(op);
238
@@ -XXX,XX +XXX,XX @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
239
}
240
num_insns++;
241
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
242
- target_ulong a;
243
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
244
- a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
245
-#else
246
- a = op->args[i];
247
-#endif
248
- s->gen_insn_data[num_insns][i] = a;
249
+ s->gen_insn_data[num_insns][i] =
250
+ tcg_get_insn_start_param(op, i);
38
}
251
}
39
break;
252
break;
40
253
case INDEX_op_discard:
41
- case INDEX_op_mulu2_i32:
42
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
43
- uint32_t a = arg_info(op->args[2])->val;
44
- uint32_t b = arg_info(op->args[3])->val;
45
- uint64_t r = (uint64_t)a * b;
46
- TCGArg rl, rh;
47
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
48
-
49
- rl = op->args[0];
50
- rh = op->args[1];
51
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
52
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
53
- continue;
54
- }
55
- break;
56
-
57
default:
58
break;
59
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
CASE_OP_32_64(muluh):
62
done = fold_mul_highpart(&ctx, op);
63
break;
64
+ case INDEX_op_mulu2_i32:
65
+ done = fold_mulu2_i32(&ctx, op);
66
+ break;
67
CASE_OP_32_64(nand):
68
done = fold_nand(&ctx, op);
69
break;
70
--
254
--
71
2.25.1
255
2.34.1
72
256
73
257
diff view generated by jsdifflib
1
Calls are special in that they have a variable number
1
Always pass the target address as uint64_t.
2
of arguments, and need to be able to clobber globals.
2
Adjust tcg_out_{ld,st}_helper_args to match.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
7
include/tcg/tcg-ldst.h | 26 +++++++++---------
9
1 file changed, 41 insertions(+), 22 deletions(-)
8
accel/tcg/cputlb.c | 26 +++++++++---------
9
accel/tcg/user-exec.c | 26 +++++++++---------
10
tcg/tcg.c | 62 ++++++++++++++++++++++++++++++++----------
11
4 files changed, 87 insertions(+), 53 deletions(-)
10
12
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
15
--- a/include/tcg/tcg-ldst.h
14
+++ b/tcg/optimize.c
16
+++ b/include/tcg/tcg-ldst.h
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
17
@@ -XXX,XX +XXX,XX @@
18
#define TCG_LDST_H
19
20
/* Value zero-extended to tcg register size. */
21
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
22
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
23
MemOpIdx oi, uintptr_t retaddr);
24
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
25
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
26
MemOpIdx oi, uintptr_t retaddr);
27
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
28
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
29
MemOpIdx oi, uintptr_t retaddr);
30
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
31
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
32
MemOpIdx oi, uintptr_t retaddr);
33
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
34
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
35
MemOpIdx oi, uintptr_t retaddr);
36
37
/* Value sign-extended to tcg register size. */
38
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
39
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
40
MemOpIdx oi, uintptr_t retaddr);
41
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
42
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
43
MemOpIdx oi, uintptr_t retaddr);
44
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
45
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
46
MemOpIdx oi, uintptr_t retaddr);
47
48
/*
49
* Value extended to at least uint32_t, so that some ABIs do not require
50
* zero-extension from uint8_t or uint16_t.
51
*/
52
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
53
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
54
MemOpIdx oi, uintptr_t retaddr);
55
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
56
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
57
MemOpIdx oi, uintptr_t retaddr);
58
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
59
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
60
MemOpIdx oi, uintptr_t retaddr);
61
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
62
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
63
MemOpIdx oi, uintptr_t retaddr);
64
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
65
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
66
MemOpIdx oi, uintptr_t retaddr);
67
68
#endif /* TCG_LDST_H */
69
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
70
index XXXXXXX..XXXXXXX 100644
71
--- a/accel/tcg/cputlb.c
72
+++ b/accel/tcg/cputlb.c
73
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
74
return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
75
}
76
77
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
78
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
79
MemOpIdx oi, uintptr_t retaddr)
80
{
81
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_8);
82
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
83
return ret;
84
}
85
86
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
87
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
88
MemOpIdx oi, uintptr_t retaddr)
89
{
90
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
91
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
92
return ret;
93
}
94
95
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
96
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
97
MemOpIdx oi, uintptr_t retaddr)
98
{
99
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
100
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
101
return ret;
102
}
103
104
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
105
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
106
MemOpIdx oi, uintptr_t retaddr)
107
{
108
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
109
@@ -XXX,XX +XXX,XX @@ uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
110
* avoid this for 64-bit data, or for 32-bit data on 32-bit host.
111
*/
112
113
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
114
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
115
MemOpIdx oi, uintptr_t retaddr)
116
{
117
return (int8_t)helper_ldub_mmu(env, addr, oi, retaddr);
118
}
119
120
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
121
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
122
MemOpIdx oi, uintptr_t retaddr)
123
{
124
return (int16_t)helper_lduw_mmu(env, addr, oi, retaddr);
125
}
126
127
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
128
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
129
MemOpIdx oi, uintptr_t retaddr)
130
{
131
return (int32_t)helper_ldul_mmu(env, addr, oi, retaddr);
132
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_mmu(CPUArchState *env, target_ulong addr,
133
return ret;
134
}
135
136
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
137
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
138
uint32_t oi, uintptr_t retaddr)
139
{
140
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
141
@@ -XXX,XX +XXX,XX @@ static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
16
}
142
}
17
}
143
}
18
144
19
+static bool fold_call(OptContext *ctx, TCGOp *op)
145
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
20
+{
146
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
21
+ TCGContext *s = ctx->tcg;
147
MemOpIdx oi, uintptr_t ra)
22
+ int nb_oargs = TCGOP_CALLO(op);
148
{
23
+ int nb_iargs = TCGOP_CALLI(op);
149
MMULookupLocals l;
24
+ int flags, i;
150
@@ -XXX,XX +XXX,XX @@ static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
25
+
151
do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
26
+ init_arguments(ctx, op, nb_oargs + nb_iargs);
152
}
27
+ copy_propagate(ctx, op, nb_oargs, nb_iargs);
153
28
+
154
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
29
+ /* If the function reads or writes globals, reset temp data. */
155
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
30
+ flags = tcg_call_flags(op);
156
MemOpIdx oi, uintptr_t retaddr)
31
+ if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
157
{
32
+ int nb_globals = s->nb_globals;
158
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
33
+
159
@@ -XXX,XX +XXX,XX @@ static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
34
+ for (i = 0; i < nb_globals; i++) {
160
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
35
+ if (test_bit(i, ctx->temps_used.l)) {
161
}
36
+ reset_ts(&ctx->tcg->temps[i]);
162
37
+ }
163
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
38
+ }
164
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
165
MemOpIdx oi, uintptr_t retaddr)
166
{
167
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
168
@@ -XXX,XX +XXX,XX @@ static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
169
(void) do_st_leN(env, &l.page[1], val, l.mmu_idx, l.memop, ra);
170
}
171
172
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
173
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
174
MemOpIdx oi, uintptr_t retaddr)
175
{
176
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
177
@@ -XXX,XX +XXX,XX @@ static void do_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
178
}
179
}
180
181
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
182
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
183
MemOpIdx oi, uintptr_t retaddr)
184
{
185
tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
186
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
187
index XXXXXXX..XXXXXXX 100644
188
--- a/accel/tcg/user-exec.c
189
+++ b/accel/tcg/user-exec.c
190
@@ -XXX,XX +XXX,XX @@ static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
191
return ret;
192
}
193
194
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, target_ulong addr,
195
+tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
196
MemOpIdx oi, uintptr_t ra)
197
{
198
return do_ld1_mmu(env, addr, get_memop(oi), ra);
199
}
200
201
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, target_ulong addr,
202
+tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
203
MemOpIdx oi, uintptr_t ra)
204
{
205
return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
206
@@ -XXX,XX +XXX,XX @@ static uint16_t do_ld2_he_mmu(CPUArchState *env, abi_ptr addr,
207
return ret;
208
}
209
210
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
211
+tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
212
MemOpIdx oi, uintptr_t ra)
213
{
214
MemOp mop = get_memop(oi);
215
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_lduw_mmu(CPUArchState *env, target_ulong addr,
216
return ret;
217
}
218
219
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, target_ulong addr,
220
+tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
221
MemOpIdx oi, uintptr_t ra)
222
{
223
MemOp mop = get_memop(oi);
224
@@ -XXX,XX +XXX,XX @@ static uint32_t do_ld4_he_mmu(CPUArchState *env, abi_ptr addr,
225
return ret;
226
}
227
228
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
229
+tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
230
MemOpIdx oi, uintptr_t ra)
231
{
232
MemOp mop = get_memop(oi);
233
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_ldul_mmu(CPUArchState *env, target_ulong addr,
234
return ret;
235
}
236
237
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, target_ulong addr,
238
+tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
239
MemOpIdx oi, uintptr_t ra)
240
{
241
MemOp mop = get_memop(oi);
242
@@ -XXX,XX +XXX,XX @@ static uint64_t do_ld8_he_mmu(CPUArchState *env, abi_ptr addr,
243
return ret;
244
}
245
246
-uint64_t helper_ldq_mmu(CPUArchState *env, target_ulong addr,
247
+uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
248
MemOpIdx oi, uintptr_t ra)
249
{
250
MemOp mop = get_memop(oi);
251
@@ -XXX,XX +XXX,XX @@ static Int128 do_ld16_he_mmu(CPUArchState *env, abi_ptr addr,
252
return ret;
253
}
254
255
-Int128 helper_ld16_mmu(CPUArchState *env, target_ulong addr,
256
+Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
257
MemOpIdx oi, uintptr_t ra)
258
{
259
MemOp mop = get_memop(oi);
260
@@ -XXX,XX +XXX,XX @@ static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
261
clear_helper_retaddr();
262
}
263
264
-void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
265
+void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
266
MemOpIdx oi, uintptr_t ra)
267
{
268
do_st1_mmu(env, addr, val, get_memop(oi), ra);
269
@@ -XXX,XX +XXX,XX @@ static void do_st2_he_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
270
clear_helper_retaddr();
271
}
272
273
-void helper_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
274
+void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
275
MemOpIdx oi, uintptr_t ra)
276
{
277
MemOp mop = get_memop(oi);
278
@@ -XXX,XX +XXX,XX @@ static void do_st4_he_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
279
clear_helper_retaddr();
280
}
281
282
-void helper_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
283
+void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
284
MemOpIdx oi, uintptr_t ra)
285
{
286
MemOp mop = get_memop(oi);
287
@@ -XXX,XX +XXX,XX @@ static void do_st8_he_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
288
clear_helper_retaddr();
289
}
290
291
-void helper_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
292
+void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
293
MemOpIdx oi, uintptr_t ra)
294
{
295
MemOp mop = get_memop(oi);
296
@@ -XXX,XX +XXX,XX @@ static void do_st16_he_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
297
clear_helper_retaddr();
298
}
299
300
-void helper_st16_mmu(CPUArchState *env, target_ulong addr, Int128 val,
301
+void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
302
MemOpIdx oi, uintptr_t ra)
303
{
304
MemOp mop = get_memop(oi);
305
diff --git a/tcg/tcg.c b/tcg/tcg.c
306
index XXXXXXX..XXXXXXX 100644
307
--- a/tcg/tcg.c
308
+++ b/tcg/tcg.c
309
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld32_mmu = {
310
.flags = TCG_CALL_NO_WG,
311
.typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
312
| dh_typemask(env, 1)
313
- | dh_typemask(tl, 2) /* target_ulong addr */
314
+ | dh_typemask(i64, 2) /* uint64_t addr */
315
| dh_typemask(i32, 3) /* unsigned oi */
316
| dh_typemask(ptr, 4) /* uintptr_t ra */
317
};
318
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld64_mmu = {
319
.flags = TCG_CALL_NO_WG,
320
.typemask = dh_typemask(i64, 0) /* return uint64_t */
321
| dh_typemask(env, 1)
322
- | dh_typemask(tl, 2) /* target_ulong addr */
323
+ | dh_typemask(i64, 2) /* uint64_t addr */
324
| dh_typemask(i32, 3) /* unsigned oi */
325
| dh_typemask(ptr, 4) /* uintptr_t ra */
326
};
327
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_ld128_mmu = {
328
.flags = TCG_CALL_NO_WG,
329
.typemask = dh_typemask(i128, 0) /* return Int128 */
330
| dh_typemask(env, 1)
331
- | dh_typemask(tl, 2) /* target_ulong addr */
332
+ | dh_typemask(i64, 2) /* uint64_t addr */
333
| dh_typemask(i32, 3) /* unsigned oi */
334
| dh_typemask(ptr, 4) /* uintptr_t ra */
335
};
336
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st32_mmu = {
337
.flags = TCG_CALL_NO_WG,
338
.typemask = dh_typemask(void, 0)
339
| dh_typemask(env, 1)
340
- | dh_typemask(tl, 2) /* target_ulong addr */
341
+ | dh_typemask(i64, 2) /* uint64_t addr */
342
| dh_typemask(i32, 3) /* uint32_t data */
343
| dh_typemask(i32, 4) /* unsigned oi */
344
| dh_typemask(ptr, 5) /* uintptr_t ra */
345
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st64_mmu = {
346
.flags = TCG_CALL_NO_WG,
347
.typemask = dh_typemask(void, 0)
348
| dh_typemask(env, 1)
349
- | dh_typemask(tl, 2) /* target_ulong addr */
350
+ | dh_typemask(i64, 2) /* uint64_t addr */
351
| dh_typemask(i64, 3) /* uint64_t data */
352
| dh_typemask(i32, 4) /* unsigned oi */
353
| dh_typemask(ptr, 5) /* uintptr_t ra */
354
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo info_helper_st128_mmu = {
355
.flags = TCG_CALL_NO_WG,
356
.typemask = dh_typemask(void, 0)
357
| dh_typemask(env, 1)
358
- | dh_typemask(tl, 2) /* target_ulong addr */
359
+ | dh_typemask(i64, 2) /* uint64_t addr */
360
| dh_typemask(i128, 3) /* Int128 data */
361
| dh_typemask(i32, 4) /* unsigned oi */
362
| dh_typemask(ptr, 5) /* uintptr_t ra */
363
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
364
next_arg = 1;
365
366
loc = &info->in[next_arg];
367
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
368
- ldst->addrlo_reg, ldst->addrhi_reg);
369
- next_arg += nmov;
370
+ if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
371
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
372
+ ldst->addrlo_reg, ldst->addrhi_reg);
373
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
374
+ next_arg += nmov;
375
+ } else {
376
+ /*
377
+ * 32-bit host with 32-bit guest: zero-extend the guest address
378
+ * to 64-bits for the helper by storing the low part, then
379
+ * load a zero for the high part.
380
+ */
381
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
382
+ TCG_TYPE_I32, TCG_TYPE_I32,
383
+ ldst->addrlo_reg, -1);
384
+ tcg_out_helper_load_slots(s, 1, mov, parm);
385
386
- tcg_out_helper_load_slots(s, nmov, mov, parm);
387
+ tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
388
+ TCG_TYPE_I32, 0, parm);
389
+ next_arg += 2;
390
+ }
391
392
switch (info->out_kind) {
393
case TCG_CALL_RET_NORMAL:
394
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
395
396
/* Handle addr argument. */
397
loc = &info->in[next_arg];
398
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
399
- ldst->addrlo_reg, ldst->addrhi_reg);
400
- next_arg += n;
401
- nmov += n;
402
+ if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
403
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
404
+ ldst->addrlo_reg, ldst->addrhi_reg);
405
+ next_arg += n;
406
+ nmov += n;
407
+ } else {
408
+ /*
409
+ * 32-bit host with 32-bit guest: zero-extend the guest address
410
+ * to 64-bits for the helper by storing the low part. Later,
411
+ * after we have processed the register inputs, we will load a
412
+ * zero for the high part.
413
+ */
414
+ tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
415
+ TCG_TYPE_I32, TCG_TYPE_I32,
416
+ ldst->addrlo_reg, -1);
417
+ next_arg += 2;
418
+ nmov += 1;
419
+ }
420
421
/* Handle data argument. */
422
loc = &info->in[next_arg];
423
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
424
g_assert_not_reached();
425
}
426
427
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32) {
428
+ loc = &info->in[1 + !HOST_BIG_ENDIAN];
429
+ tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
39
+ }
430
+ }
40
+
431
+
41
+ /* Reset temp data for outputs. */
432
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
42
+ for (i = 0; i < nb_oargs; i++) {
433
}
43
+ reset_temp(op->args[i]);
434
44
+ }
45
+
46
+ /* Stop optimizing MB across calls. */
47
+ ctx->prev_mb = NULL;
48
+ return true;
49
+}
50
+
51
/* Propagate constants and copies, fold constant expressions. */
52
void tcg_optimize(TCGContext *s)
53
{
54
- int nb_temps, nb_globals, i;
55
+ int nb_temps, i;
56
TCGOp *op, *op_next;
57
OptContext ctx = { .tcg = s };
58
59
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
60
available through the doubly linked circular list. */
61
62
nb_temps = s->nb_temps;
63
- nb_globals = s->nb_globals;
64
-
65
for (i = 0; i < nb_temps; ++i) {
66
s->temps[i].state_ptr = NULL;
67
}
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
uint64_t z_mask, partmask, affected, tmp;
70
int nb_oargs, nb_iargs;
71
TCGOpcode opc = op->opc;
72
- const TCGOpDef *def = &tcg_op_defs[opc];
73
+ const TCGOpDef *def;
74
75
- /* Count the arguments, and initialize the temps that are
76
- going to be used */
77
+ /* Calls are special. */
78
if (opc == INDEX_op_call) {
79
- nb_oargs = TCGOP_CALLO(op);
80
- nb_iargs = TCGOP_CALLI(op);
81
- } else {
82
- nb_oargs = def->nb_oargs;
83
- nb_iargs = def->nb_iargs;
84
+ fold_call(&ctx, op);
85
+ continue;
86
}
87
+
88
+ def = &tcg_op_defs[opc];
89
+ nb_oargs = def->nb_oargs;
90
+ nb_iargs = def->nb_iargs;
91
init_arguments(&ctx, op, nb_oargs + nb_iargs);
92
copy_propagate(&ctx, op, nb_oargs, nb_iargs);
93
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
95
if (def->flags & TCG_OPF_BB_END) {
96
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
97
} else {
98
- if (opc == INDEX_op_call &&
99
- !(tcg_call_flags(op)
100
- & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
101
- for (i = 0; i < nb_globals; i++) {
102
- if (test_bit(i, ctx.temps_used.l)) {
103
- reset_ts(&s->temps[i]);
104
- }
105
- }
106
- }
107
-
108
for (i = 0; i < nb_oargs; i++) {
109
reset_temp(op->args[i]);
110
/* Save the corresponding known-zero bits mask for the
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
case INDEX_op_qemu_st_i32:
113
case INDEX_op_qemu_st8_i32:
114
case INDEX_op_qemu_st_i64:
115
- case INDEX_op_call:
116
/* Opcodes that touch guest memory stop the optimization. */
117
ctx.prev_mb = NULL;
118
break;
119
--
435
--
120
2.25.1
436
2.34.1
121
437
122
438
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Always pass the target address as uint64_t.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
6
accel/tcg/tcg-runtime.h | 4 ++--
6
1 file changed, 30 insertions(+), 18 deletions(-)
7
accel/tcg/cputlb.c | 5 ++---
8
accel/tcg/user-exec.c | 5 ++---
9
tcg/tcg-op-ldst.c | 26 ++++++++++++++++++++++++--
10
4 files changed, 30 insertions(+), 10 deletions(-)
7
11
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
9
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
14
--- a/accel/tcg/tcg-runtime.h
11
+++ b/tcg/optimize.c
15
+++ b/accel/tcg/tcg-runtime.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
13
return fold_const2(ctx, op);
17
DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
18
#endif /* IN_HELPER_PROTO */
19
20
-DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, tl, i32)
21
-DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, tl, i128, i32)
22
+DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
23
+DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
24
25
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
26
i32, env, tl, i32, i32, i32)
27
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/accel/tcg/cputlb.c
30
+++ b/accel/tcg/cputlb.c
31
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
32
return do_ld16_mmu(env, addr, oi, retaddr);
14
}
33
}
15
34
16
+static bool fold_extract(OptContext *ctx, TCGOp *op)
35
-Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, uint32_t oi)
36
+Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, uint32_t oi)
37
{
38
return helper_ld16_mmu(env, addr, oi, GETPC());
39
}
40
@@ -XXX,XX +XXX,XX @@ void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
41
do_st16_mmu(env, addr, val, oi, retaddr);
42
}
43
44
-void helper_st_i128(CPUArchState *env, target_ulong addr, Int128 val,
45
- MemOpIdx oi)
46
+void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
47
{
48
helper_st16_mmu(env, addr, val, oi, GETPC());
49
}
50
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/accel/tcg/user-exec.c
53
+++ b/accel/tcg/user-exec.c
54
@@ -XXX,XX +XXX,XX @@ Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
55
return ret;
56
}
57
58
-Int128 helper_ld_i128(CPUArchState *env, target_ulong addr, MemOpIdx oi)
59
+Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
60
{
61
return helper_ld16_mmu(env, addr, oi, GETPC());
62
}
63
@@ -XXX,XX +XXX,XX @@ void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
64
do_st16_he_mmu(env, addr, val, mop, ra);
65
}
66
67
-void helper_st_i128(CPUArchState *env, target_ulong addr,
68
- Int128 val, MemOpIdx oi)
69
+void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
70
{
71
helper_st16_mmu(env, addr, val, oi, GETPC());
72
}
73
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-ldst.c
76
+++ b/tcg/tcg-op-ldst.c
77
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
78
#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
79
#endif
80
81
+static TCGv_i64 maybe_extend_addr64(TCGv addr)
17
+{
82
+{
18
+ if (arg_is_const(op->args[1])) {
83
+#if TARGET_LONG_BITS == 32
19
+ uint64_t t;
84
+ TCGv_i64 a64 = tcg_temp_ebb_new_i64();
20
+
85
+ tcg_gen_extu_i32_i64(a64, addr);
21
+ t = arg_info(op->args[1])->val;
86
+ return a64;
22
+ t = extract64(t, op->args[2], op->args[3]);
87
+#else
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
88
+ return addr;
24
+ }
89
+#endif
25
+ return false;
26
+}
90
+}
27
+
91
+
28
static bool fold_extract2(OptContext *ctx, TCGOp *op)
92
+static void maybe_free_addr64(TCGv_i64 a64)
29
{
30
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
31
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
32
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
33
}
34
35
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
36
+{
93
+{
37
+ if (arg_is_const(op->args[1])) {
94
+#if TARGET_LONG_BITS == 32
38
+ uint64_t t;
95
+ tcg_temp_free_i64(a64);
39
+
96
+#endif
40
+ t = arg_info(op->args[1])->val;
41
+ t = sextract64(t, op->args[2], op->args[3]);
42
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
43
+ }
44
+ return false;
45
+}
97
+}
46
+
98
+
47
static bool fold_shift(OptContext *ctx, TCGOp *op)
99
void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
48
{
100
{
49
return fold_const2(ctx, op);
101
const MemOpIdx oi = make_memop_idx(memop, idx);
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
102
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
51
}
103
tcg_gen_bswap64_i64(y, y);
52
break;
104
}
53
105
} else {
54
- CASE_OP_32_64(extract):
106
- gen_helper_ld_i128(val, cpu_env, addr, tcg_constant_i32(oi));
55
- if (arg_is_const(op->args[1])) {
107
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
56
- tmp = extract64(arg_info(op->args[1])->val,
108
+ gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
57
- op->args[2], op->args[3]);
109
+ maybe_free_addr64(a64);
58
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
110
}
59
- continue;
111
60
- }
112
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
61
- break;
113
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
62
-
114
}
63
- CASE_OP_32_64(sextract):
115
tcg_temp_free(addr_p8);
64
- if (arg_is_const(op->args[1])) {
116
} else {
65
- tmp = sextract64(arg_info(op->args[1])->val,
117
- gen_helper_st_i128(cpu_env, addr, val, tcg_constant_i32(oi));
66
- op->args[2], op->args[3]);
118
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
67
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
119
+ gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
68
- continue;
120
+ maybe_free_addr64(a64);
69
- }
121
}
70
- break;
122
71
-
123
plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
72
default:
73
break;
74
75
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
76
CASE_OP_32_64(eqv):
77
done = fold_eqv(&ctx, op);
78
break;
79
+ CASE_OP_32_64(extract):
80
+ done = fold_extract(&ctx, op);
81
+ break;
82
CASE_OP_32_64(extract2):
83
done = fold_extract2(&ctx, op);
84
break;
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
case INDEX_op_setcond2_i32:
87
done = fold_setcond2(&ctx, op);
88
break;
89
+ CASE_OP_32_64(sextract):
90
+ done = fold_sextract(&ctx, op);
91
+ break;
92
CASE_OP_32_64_VEC(sub):
93
done = fold_sub(&ctx, op);
94
break;
95
--
124
--
96
2.25.1
125
2.34.1
97
126
98
127
diff view generated by jsdifflib
1
Continue splitting tcg_optimize.
1
Always pass the target address as uint64_t.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 22 ++++++++++++++--------
6
accel/tcg/tcg-runtime.h | 46 +++++++++++++++++------------------
9
1 file changed, 14 insertions(+), 8 deletions(-)
7
tcg/tcg-op-ldst.c | 38 ++++++++++++++++++++---------
10
8
accel/tcg/atomic_common.c.inc | 14 +++++------
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
3 files changed, 57 insertions(+), 41 deletions(-)
10
11
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/accel/tcg/tcg-runtime.h
14
+++ b/tcg/optimize.c
14
+++ b/accel/tcg/tcg-runtime.h
15
@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
15
@@ -XXX,XX +XXX,XX @@ DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
16
DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
17
18
DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
19
- i32, env, tl, i32, i32, i32)
20
+ i32, env, i64, i32, i32, i32)
21
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
22
- i32, env, tl, i32, i32, i32)
23
+ i32, env, i64, i32, i32, i32)
24
DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
25
- i32, env, tl, i32, i32, i32)
26
+ i32, env, i64, i32, i32, i32)
27
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
28
- i32, env, tl, i32, i32, i32)
29
+ i32, env, i64, i32, i32, i32)
30
DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
31
- i32, env, tl, i32, i32, i32)
32
+ i32, env, i64, i32, i32, i32)
33
#ifdef CONFIG_ATOMIC64
34
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
35
- i64, env, tl, i64, i64, i32)
36
+ i64, env, i64, i64, i64, i32)
37
DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
38
- i64, env, tl, i64, i64, i32)
39
+ i64, env, i64, i64, i64, i32)
40
#endif
41
#ifdef CONFIG_CMPXCHG128
42
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
43
- i128, env, tl, i128, i128, i32)
44
+ i128, env, i64, i128, i128, i32)
45
DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
46
- i128, env, tl, i128, i128, i32)
47
+ i128, env, i64, i128, i128, i32)
48
#endif
49
50
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
51
- i128, env, tl, i128, i128, i32)
52
+ i128, env, i64, i128, i128, i32)
53
DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
54
- i128, env, tl, i128, i128, i32)
55
+ i128, env, i64, i128, i128, i32)
56
57
#ifdef CONFIG_ATOMIC64
58
#define GEN_ATOMIC_HELPERS(NAME) \
59
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
60
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
61
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
62
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
63
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
64
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
65
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
66
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
67
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
68
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
69
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
70
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
71
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
72
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
73
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
74
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le), \
75
- TCG_CALL_NO_WG, i64, env, tl, i64, i32) \
76
+ TCG_CALL_NO_WG, i64, env, i64, i64, i32) \
77
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be), \
78
- TCG_CALL_NO_WG, i64, env, tl, i64, i32)
79
+ TCG_CALL_NO_WG, i64, env, i64, i64, i32)
80
#else
81
#define GEN_ATOMIC_HELPERS(NAME) \
82
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b), \
83
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
84
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
85
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le), \
86
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
87
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
88
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be), \
89
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
90
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
91
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le), \
92
- TCG_CALL_NO_WG, i32, env, tl, i32, i32) \
93
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32) \
94
DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be), \
95
- TCG_CALL_NO_WG, i32, env, tl, i32, i32)
96
+ TCG_CALL_NO_WG, i32, env, i64, i32, i32)
97
#endif /* CONFIG_ATOMIC64 */
98
99
GEN_ATOMIC_HELPERS(fetch_add)
100
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/tcg/tcg-op-ldst.c
103
+++ b/tcg/tcg-op-ldst.c
104
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
16
}
105
}
17
}
106
}
18
107
19
+static void copy_propagate(OptContext *ctx, TCGOp *op,
108
-typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv,
20
+ int nb_oargs, int nb_iargs)
109
+typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
21
+{
110
TCGv_i32, TCGv_i32, TCGv_i32);
22
+ TCGContext *s = ctx->tcg;
111
-typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv,
23
+
112
+typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
24
+ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
113
TCGv_i64, TCGv_i64, TCGv_i32);
25
+ TCGTemp *ts = arg_temp(op->args[i]);
114
-typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv,
26
+ if (ts && ts_is_copy(ts)) {
115
+typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
27
+ op->args[i] = temp_arg(find_better_copy(s, ts));
116
TCGv_i128, TCGv_i128, TCGv_i32);
28
+ }
117
-typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv,
29
+ }
118
+typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
30
+}
119
TCGv_i32, TCGv_i32);
31
+
120
-typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv,
32
/* Propagate constants and copies, fold constant expressions. */
121
+typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
33
void tcg_optimize(TCGContext *s)
122
TCGv_i64, TCGv_i32);
34
{
123
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
124
#ifdef CONFIG_ATOMIC64
36
nb_iargs = def->nb_iargs;
125
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
126
TCGv_i32 newv, TCGArg idx, MemOp memop)
127
{
128
gen_atomic_cx_i32 gen;
129
+ TCGv_i64 a64;
130
MemOpIdx oi;
131
132
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
133
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
134
tcg_debug_assert(gen != NULL);
135
136
oi = make_memop_idx(memop & ~MO_SIGN, idx);
137
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
138
+ a64 = maybe_extend_addr64(addr);
139
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
140
+ maybe_free_addr64(a64);
141
142
if (memop & MO_SIGN) {
143
tcg_gen_ext_i32(retv, retv, memop);
144
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
145
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
146
if (gen) {
147
MemOpIdx oi = make_memop_idx(memop, idx);
148
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
149
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
150
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
151
+ maybe_free_addr64(a64);
152
return;
37
}
153
}
38
init_arguments(&ctx, op, nb_oargs + nb_iargs);
154
39
-
155
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
40
- /* Do copy propagation */
156
? gen_helper_nonatomic_cmpxchgo_le
41
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
157
: gen_helper_nonatomic_cmpxchgo_be);
42
- TCGTemp *ts = arg_temp(op->args[i]);
158
MemOpIdx oi = make_memop_idx(memop, idx);
43
- if (ts && ts_is_copy(ts)) {
159
+ TCGv_i64 a64;
44
- op->args[i] = temp_arg(find_better_copy(s, ts));
160
45
- }
161
tcg_debug_assert((memop & MO_SIZE) == MO_128);
46
- }
162
tcg_debug_assert((memop & MO_SIGN) == 0);
47
+ copy_propagate(&ctx, op, nb_oargs, nb_iargs);
163
48
164
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
49
/* For commutative operations make constant second argument */
165
+ a64 = maybe_extend_addr64(addr);
50
switch (opc) {
166
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
167
+ maybe_free_addr64(a64);
168
} else {
169
TCGv_i128 oldv = tcg_temp_ebb_new_i128();
170
TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
171
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
172
173
if (gen) {
174
MemOpIdx oi = make_memop_idx(memop, idx);
175
- gen(retv, cpu_env, addr, cmpv, newv, tcg_constant_i32(oi));
176
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
177
+ gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
178
+ maybe_free_addr64(a64);
179
return;
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
183
TCGArg idx, MemOp memop, void * const table[])
184
{
185
gen_atomic_op_i32 gen;
186
+ TCGv_i64 a64;
187
MemOpIdx oi;
188
189
memop = tcg_canonicalize_memop(memop, 0, 0);
190
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
191
tcg_debug_assert(gen != NULL);
192
193
oi = make_memop_idx(memop & ~MO_SIGN, idx);
194
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
195
+ a64 = maybe_extend_addr64(addr);
196
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
197
+ maybe_free_addr64(a64);
198
199
if (memop & MO_SIGN) {
200
tcg_gen_ext_i32(ret, ret, memop);
201
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
202
if ((memop & MO_SIZE) == MO_64) {
203
#ifdef CONFIG_ATOMIC64
204
gen_atomic_op_i64 gen;
205
+ TCGv_i64 a64;
206
MemOpIdx oi;
207
208
gen = table[memop & (MO_SIZE | MO_BSWAP)];
209
tcg_debug_assert(gen != NULL);
210
211
oi = make_memop_idx(memop & ~MO_SIGN, idx);
212
- gen(ret, cpu_env, addr, val, tcg_constant_i32(oi));
213
+ a64 = maybe_extend_addr64(addr);
214
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
215
+ maybe_free_addr64(a64);
216
#else
217
gen_helper_exit_atomic(cpu_env);
218
/* Produce a result, so that we have a well-formed opcode stream
219
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
220
index XXXXXXX..XXXXXXX 100644
221
--- a/accel/tcg/atomic_common.c.inc
222
+++ b/accel/tcg/atomic_common.c.inc
223
@@ -XXX,XX +XXX,XX @@
224
* See the COPYING file in the top-level directory.
225
*/
226
227
-static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
228
+static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
229
MemOpIdx oi)
230
{
231
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
232
}
233
234
#if HAVE_ATOMIC128
235
-static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
236
+static void atomic_trace_ld_post(CPUArchState *env, uint64_t addr,
237
MemOpIdx oi)
238
{
239
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
240
}
241
242
-static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
243
+static void atomic_trace_st_post(CPUArchState *env, uint64_t addr,
244
MemOpIdx oi)
245
{
246
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
247
@@ -XXX,XX +XXX,XX @@ static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
248
*/
249
250
#define CMPXCHG_HELPER(OP, TYPE) \
251
- TYPE HELPER(atomic_##OP)(CPUArchState *env, target_ulong addr, \
252
+ TYPE HELPER(atomic_##OP)(CPUArchState *env, uint64_t addr, \
253
TYPE oldv, TYPE newv, uint32_t oi) \
254
{ return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); }
255
256
@@ -XXX,XX +XXX,XX @@ CMPXCHG_HELPER(cmpxchgo_le, Int128)
257
258
#undef CMPXCHG_HELPER
259
260
-Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
261
+Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, uint64_t addr,
262
Int128 cmpv, Int128 newv, uint32_t oi)
263
{
264
#if TCG_TARGET_REG_BITS == 32
265
@@ -XXX,XX +XXX,XX @@ Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
266
#endif
267
}
268
269
-Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
270
+Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, uint64_t addr,
271
Int128 cmpv, Int128 newv, uint32_t oi)
272
{
273
#if TCG_TARGET_REG_BITS == 32
274
@@ -XXX,XX +XXX,XX @@ Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
275
}
276
277
#define ATOMIC_HELPER(OP, TYPE) \
278
- TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr, \
279
+ TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, uint64_t addr, \
280
TYPE val, uint32_t oi) \
281
{ return glue(glue(cpu_atomic_,OP),_mmu)(env, addr, val, oi, GETPC()); }
282
51
--
283
--
52
2.25.1
284
2.34.1
53
285
54
286
diff view generated by jsdifflib
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
3
---
6
tcg/optimize.c | 9 ++++++---
4
include/tcg/tcg.h | 2 +-
7
1 file changed, 6 insertions(+), 3 deletions(-)
5
tcg/tcg.c | 2 +-
6
2 files changed, 2 insertions(+), 2 deletions(-)
8
7
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
10
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
10
--- a/include/tcg/tcg.h
12
+++ b/tcg/optimize.c
11
+++ b/include/tcg/tcg.h
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
12
@@ -XXX,XX +XXX,XX @@ void tcg_register_thread(void);
14
uint64_t z_mask, partmask, affected, tmp;
13
void tcg_prologue_init(TCGContext *s);
15
TCGOpcode opc = op->opc;
14
void tcg_func_start(TCGContext *s);
16
const TCGOpDef *def;
15
17
+ bool done = false;
16
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
18
17
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start);
19
/* Calls are special. */
18
20
if (opc == INDEX_op_call) {
19
void tb_target_set_jmp_target(const TranslationBlock *, int,
21
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
20
uintptr_t, uintptr_t);
22
allocator where needed and possible. Also detect copies. */
21
diff --git a/tcg/tcg.c b/tcg/tcg.c
23
switch (opc) {
22
index XXXXXXX..XXXXXXX 100644
24
CASE_OP_32_64_VEC(mov):
23
--- a/tcg/tcg.c
25
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
24
+++ b/tcg/tcg.c
26
- continue;
25
@@ -XXX,XX +XXX,XX @@ int64_t tcg_cpu_exec_time(void)
27
+ done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
26
#endif
28
+ break;
27
29
28
30
case INDEX_op_dup_vec:
29
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
31
if (arg_is_const(op->args[1])) {
30
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
{
33
break;
32
#ifdef CONFIG_PROFILER
34
}
33
TCGProfile *prof = &s->prof;
35
36
- finish_folding(&ctx, op);
37
+ if (!done) {
38
+ finish_folding(&ctx, op);
39
+ }
40
41
/* Eliminate duplicate and redundant fence instructions. */
42
if (ctx.prev_mb) {
43
--
34
--
44
2.25.1
35
2.34.1
45
36
46
37
diff view generated by jsdifflib
1
Return -1 instead of 2 for failure, so that we can
1
As gen_mem_wrapped is only used in plugin_gen_empty_mem_callback,
2
use comparisons against 0 for all cases.
2
we can avoid the curiosity of union mem_gen_fn by inlining it.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
7
accel/tcg/plugin-gen.c | 30 ++++++------------------------
9
1 file changed, 74 insertions(+), 71 deletions(-)
8
1 file changed, 6 insertions(+), 24 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/accel/tcg/plugin-gen.c
14
+++ b/tcg/optimize.c
13
+++ b/accel/tcg/plugin-gen.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
14
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
16
}
15
}
17
}
16
}
18
17
19
-/* Return 2 if the condition can't be simplified, and the result
18
-union mem_gen_fn {
20
- of the condition (0 or 1) if it can */
19
- void (*mem_fn)(TCGv, uint32_t);
21
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
20
- void (*inline_fn)(void);
22
- TCGArg y, TCGCond c)
21
-};
23
+/*
22
-
24
+ * Return -1 if the condition can't be simplified,
23
-static void gen_mem_wrapped(enum plugin_gen_cb type,
25
+ * and the result of the condition (0 or 1) if it can.
24
- const union mem_gen_fn *f, TCGv addr,
26
+ */
25
- uint32_t info, bool is_mem)
27
+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
26
+void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
28
+ TCGArg y, TCGCond c)
29
{
27
{
30
uint64_t xv = arg_info(x)->val;
28
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
31
uint64_t yv = arg_info(y)->val;
29
32
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
30
- gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
33
case TCG_COND_GEU:
31
- if (is_mem) {
34
return 1;
32
- f->mem_fn(addr, info);
35
default:
33
- } else {
36
- return 2;
34
- f->inline_fn();
37
+ return -1;
35
- }
38
}
36
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_MEM, rw);
39
}
37
+ gen_empty_mem_cb(addr, info);
40
- return 2;
38
tcg_gen_plugin_cb_end();
41
+ return -1;
39
-}
40
41
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
42
-{
43
- union mem_gen_fn fn;
44
-
45
- fn.mem_fn = gen_empty_mem_cb;
46
- gen_mem_wrapped(PLUGIN_GEN_CB_MEM, &fn, addr, info, true);
47
-
48
- fn.inline_fn = gen_empty_inline_cb;
49
- gen_mem_wrapped(PLUGIN_GEN_CB_INLINE, &fn, 0, info, false);
50
+ gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_INLINE, rw);
51
+ gen_empty_inline_cb();
52
+ tcg_gen_plugin_cb_end();
42
}
53
}
43
54
44
-/* Return 2 if the condition can't be simplified, and the result
55
static TCGOp *find_op(TCGOp *op, TCGOpcode opc)
45
- of the condition (0 or 1) if it can */
46
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
47
+/*
48
+ * Return -1 if the condition can't be simplified,
49
+ * and the result of the condition (0 or 1) if it can.
50
+ */
51
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
52
{
53
TCGArg al = p1[0], ah = p1[1];
54
TCGArg bl = p2[0], bh = p2[1];
55
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
56
if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
57
return do_constant_folding_cond_eq(c);
58
}
59
- return 2;
60
+ return -1;
61
}
62
63
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
65
break;
66
67
CASE_OP_32_64(setcond):
68
- tmp = do_constant_folding_cond(opc, op->args[1],
69
- op->args[2], op->args[3]);
70
- if (tmp != 2) {
71
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
72
+ i = do_constant_folding_cond(opc, op->args[1],
73
+ op->args[2], op->args[3]);
74
+ if (i >= 0) {
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
76
continue;
77
}
78
break;
79
80
CASE_OP_32_64(brcond):
81
- tmp = do_constant_folding_cond(opc, op->args[0],
82
- op->args[1], op->args[2]);
83
- switch (tmp) {
84
- case 0:
85
+ i = do_constant_folding_cond(opc, op->args[0],
86
+ op->args[1], op->args[2]);
87
+ if (i == 0) {
88
tcg_op_remove(s, op);
89
continue;
90
- case 1:
91
+ } else if (i > 0) {
92
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
93
op->opc = opc = INDEX_op_br;
94
op->args[0] = op->args[3];
95
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
96
break;
97
98
CASE_OP_32_64(movcond):
99
- tmp = do_constant_folding_cond(opc, op->args[1],
100
- op->args[2], op->args[5]);
101
- if (tmp != 2) {
102
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
103
+ i = do_constant_folding_cond(opc, op->args[1],
104
+ op->args[2], op->args[5]);
105
+ if (i >= 0) {
106
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
107
continue;
108
}
109
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
break;
112
113
case INDEX_op_brcond2_i32:
114
- tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
115
- op->args[4]);
116
- if (tmp == 0) {
117
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2],
118
+ op->args[4]);
119
+ if (i == 0) {
120
do_brcond_false:
121
tcg_op_remove(s, op);
122
continue;
123
}
124
- if (tmp == 1) {
125
+ if (i > 0) {
126
do_brcond_true:
127
op->opc = opc = INDEX_op_br;
128
op->args[0] = op->args[5];
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
if (op->args[4] == TCG_COND_EQ) {
131
/* Simplify EQ comparisons where one of the pairs
132
can be simplified. */
133
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
134
- op->args[0], op->args[2],
135
- TCG_COND_EQ);
136
- if (tmp == 0) {
137
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
138
+ op->args[0], op->args[2],
139
+ TCG_COND_EQ);
140
+ if (i == 0) {
141
goto do_brcond_false;
142
- } else if (tmp == 1) {
143
+ } else if (i > 0) {
144
goto do_brcond_high;
145
}
146
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
147
- op->args[1], op->args[3],
148
- TCG_COND_EQ);
149
- if (tmp == 0) {
150
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
151
+ op->args[1], op->args[3],
152
+ TCG_COND_EQ);
153
+ if (i == 0) {
154
goto do_brcond_false;
155
- } else if (tmp != 1) {
156
+ } else if (i < 0) {
157
break;
158
}
159
do_brcond_low:
160
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
161
if (op->args[4] == TCG_COND_NE) {
162
/* Simplify NE comparisons where one of the pairs
163
can be simplified. */
164
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
165
- op->args[0], op->args[2],
166
- TCG_COND_NE);
167
- if (tmp == 0) {
168
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
169
+ op->args[0], op->args[2],
170
+ TCG_COND_NE);
171
+ if (i == 0) {
172
goto do_brcond_high;
173
- } else if (tmp == 1) {
174
+ } else if (i > 0) {
175
goto do_brcond_true;
176
}
177
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
178
- op->args[1], op->args[3],
179
- TCG_COND_NE);
180
- if (tmp == 0) {
181
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
182
+ op->args[1], op->args[3],
183
+ TCG_COND_NE);
184
+ if (i == 0) {
185
goto do_brcond_low;
186
- } else if (tmp == 1) {
187
+ } else if (i > 0) {
188
goto do_brcond_true;
189
}
190
}
191
break;
192
193
case INDEX_op_setcond2_i32:
194
- tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
195
- op->args[5]);
196
- if (tmp != 2) {
197
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3],
198
+ op->args[5]);
199
+ if (i >= 0) {
200
do_setcond_const:
201
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
202
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
203
continue;
204
}
205
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
206
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
207
if (op->args[5] == TCG_COND_EQ) {
208
/* Simplify EQ comparisons where one of the pairs
209
can be simplified. */
210
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
211
- op->args[1], op->args[3],
212
- TCG_COND_EQ);
213
- if (tmp == 0) {
214
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
215
+ op->args[1], op->args[3],
216
+ TCG_COND_EQ);
217
+ if (i == 0) {
218
goto do_setcond_const;
219
- } else if (tmp == 1) {
220
+ } else if (i > 0) {
221
goto do_setcond_high;
222
}
223
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
224
- op->args[2], op->args[4],
225
- TCG_COND_EQ);
226
- if (tmp == 0) {
227
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
228
+ op->args[2], op->args[4],
229
+ TCG_COND_EQ);
230
+ if (i == 0) {
231
goto do_setcond_high;
232
- } else if (tmp != 1) {
233
+ } else if (i < 0) {
234
break;
235
}
236
do_setcond_low:
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
238
if (op->args[5] == TCG_COND_NE) {
239
/* Simplify NE comparisons where one of the pairs
240
can be simplified. */
241
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
242
- op->args[1], op->args[3],
243
- TCG_COND_NE);
244
- if (tmp == 0) {
245
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
246
+ op->args[1], op->args[3],
247
+ TCG_COND_NE);
248
+ if (i == 0) {
249
goto do_setcond_high;
250
- } else if (tmp == 1) {
251
+ } else if (i > 0) {
252
goto do_setcond_const;
253
}
254
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
255
- op->args[2], op->args[4],
256
- TCG_COND_NE);
257
- if (tmp == 0) {
258
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
259
+ op->args[2], op->args[4],
260
+ TCG_COND_NE);
261
+ if (i == 0) {
262
goto do_setcond_low;
263
- } else if (tmp == 1) {
264
+ } else if (i > 0) {
265
goto do_setcond_const;
266
}
267
}
268
--
56
--
269
2.25.1
57
2.34.1
270
58
271
59
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
As do_gen_mem_cb is called once, merge it into gen_empty_mem_cb.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/optimize.c | 23 ++++++++++++++---------
6
accel/tcg/plugin-gen.c | 39 +++++++++++++++++----------------------
6
1 file changed, 14 insertions(+), 9 deletions(-)
7
1 file changed, 17 insertions(+), 22 deletions(-)
7
8
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
9
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
--- a/accel/tcg/plugin-gen.c
11
+++ b/tcg/optimize.c
12
+++ b/accel/tcg/plugin-gen.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
13
return fold_const2(ctx, op);
14
void *userdata)
15
{ }
16
17
-static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
18
-{
19
- TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
20
- TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
21
- TCGv_i64 vaddr64 = tcg_temp_ebb_new_i64();
22
- TCGv_ptr udata = tcg_temp_ebb_new_ptr();
23
-
24
- tcg_gen_movi_i32(meminfo, info);
25
- tcg_gen_movi_ptr(udata, 0);
26
- tcg_gen_ld_i32(cpu_index, cpu_env,
27
- -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
28
- tcg_gen_extu_tl_i64(vaddr64, vaddr);
29
-
30
- gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, vaddr64, udata);
31
-
32
- tcg_temp_free_ptr(udata);
33
- tcg_temp_free_i64(vaddr64);
34
- tcg_temp_free_i32(meminfo);
35
- tcg_temp_free_i32(cpu_index);
36
-}
37
-
38
static void gen_empty_udata_cb(void)
39
{
40
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
41
@@ -XXX,XX +XXX,XX @@ static void gen_empty_inline_cb(void)
42
43
static void gen_empty_mem_cb(TCGv addr, uint32_t info)
44
{
45
- do_gen_mem_cb(addr, info);
46
+ TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
47
+ TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
48
+ TCGv_i64 addr64 = tcg_temp_ebb_new_i64();
49
+ TCGv_ptr udata = tcg_temp_ebb_new_ptr();
50
+
51
+ tcg_gen_movi_i32(meminfo, info);
52
+ tcg_gen_movi_ptr(udata, 0);
53
+ tcg_gen_ld_i32(cpu_index, cpu_env,
54
+ -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
55
+ tcg_gen_extu_tl_i64(addr64, addr);
56
+
57
+ gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr64, udata);
58
+
59
+ tcg_temp_free_ptr(udata);
60
+ tcg_temp_free_i64(addr64);
61
+ tcg_temp_free_i32(meminfo);
62
+ tcg_temp_free_i32(cpu_index);
14
}
63
}
15
64
16
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
65
/*
17
+{
18
+ TCGCond cond = op->args[3];
19
+ int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
20
+
21
+ if (i >= 0) {
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
+ }
24
+ return false;
25
+}
26
+
27
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
28
{
29
TCGCond cond = op->args[5];
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
}
32
break;
33
34
- CASE_OP_32_64(setcond):
35
- i = do_constant_folding_cond(opc, op->args[1],
36
- op->args[2], op->args[3]);
37
- if (i >= 0) {
38
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
39
- continue;
40
- }
41
- break;
42
-
43
CASE_OP_32_64(movcond):
44
i = do_constant_folding_cond(opc, op->args[1],
45
op->args[2], op->args[5]);
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
47
CASE_OP_32_64(shr):
48
done = fold_shift(&ctx, op);
49
break;
50
+ CASE_OP_32_64(setcond):
51
+ done = fold_setcond(&ctx, op);
52
+ break;
53
case INDEX_op_setcond2_i32:
54
done = fold_setcond2(&ctx, op);
55
break;
56
--
66
--
57
2.25.1
67
2.34.1
58
68
59
69
diff view generated by jsdifflib
1
Sign repetitions are perforce all identical, whether they are 1 or 0.
1
We only need to make copies for loads, when the destination
2
Bitwise operations preserve the relative quantity of the repetitions.
2
overlaps the address. For now, only eliminate the copy for
3
stores and 128-bit loads.
4
5
Rename plugin_prep_mem_callbacks to plugin_maybe_preserve_addr,
6
returning NULL if no copy is made.
3
7
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
tcg/optimize.c | 29 +++++++++++++++++++++++++++++
11
tcg/tcg-op-ldst.c | 38 ++++++++++++++++++++------------------
10
1 file changed, 29 insertions(+)
12
1 file changed, 20 insertions(+), 18 deletions(-)
11
13
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
16
--- a/tcg/tcg-op-ldst.c
15
+++ b/tcg/optimize.c
17
+++ b/tcg/tcg-op-ldst.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
18
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
17
z2 = arg_info(op->args[2])->z_mask;
18
ctx->z_mask = z1 & z2;
19
20
+ /*
21
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
22
+ * Bitwise operations preserve the relative quantity of the repetitions.
23
+ */
24
+ ctx->s_mask = arg_info(op->args[1])->s_mask
25
+ & arg_info(op->args[2])->s_mask;
26
+
27
/*
28
* Known-zeros does not imply known-ones. Therefore unless
29
* arg2 is constant, we can't infer affected bits from it.
30
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
31
}
19
}
32
ctx->z_mask = z1;
33
34
+ ctx->s_mask = arg_info(op->args[1])->s_mask
35
+ & arg_info(op->args[2])->s_mask;
36
return fold_masks(ctx, op);
37
}
20
}
38
21
39
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
22
-static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
40
fold_xi_to_not(ctx, op, 0)) {
23
+/* Only required for loads, where value might overlap addr. */
41
return true;
24
+static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
25
{
26
#ifdef CONFIG_PLUGIN
27
if (tcg_ctx->plugin_insn != NULL) {
28
@@ -XXX,XX +XXX,XX @@ static inline TCGv plugin_prep_mem_callbacks(TCGv vaddr)
29
return temp;
42
}
30
}
43
+
31
#endif
44
+ ctx->s_mask = arg_info(op->args[1])->s_mask
32
- return vaddr;
45
+ & arg_info(op->args[2])->s_mask;
33
+ return NULL;
46
return false;
47
}
34
}
48
35
49
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
36
-static void plugin_gen_mem_callbacks(TCGv vaddr, MemOpIdx oi,
50
37
- enum qemu_plugin_mem_rw rw)
51
ctx->z_mask = arg_info(op->args[3])->z_mask
38
+static void
52
| arg_info(op->args[4])->z_mask;
39
+plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
53
+ ctx->s_mask = arg_info(op->args[3])->s_mask
40
+ enum qemu_plugin_mem_rw rw)
54
+ & arg_info(op->args[4])->s_mask;
41
{
55
42
#ifdef CONFIG_PLUGIN
56
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
43
if (tcg_ctx->plugin_insn != NULL) {
57
uint64_t tv = arg_info(op->args[3])->val;
44
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
58
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
45
- plugin_gen_empty_mem_callback(vaddr, info);
59
fold_xi_to_not(ctx, op, -1)) {
46
- tcg_temp_free(vaddr);
60
return true;
47
+ plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
48
+ if (copy_addr) {
49
+ tcg_temp_free(copy_addr);
50
+ }
61
}
51
}
62
+
52
#endif
63
+ ctx->s_mask = arg_info(op->args[1])->s_mask
64
+ & arg_info(op->args[2])->s_mask;
65
return false;
66
}
53
}
67
54
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
68
@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
55
{
69
fold_xi_to_not(ctx, op, 0)) {
56
MemOp orig_memop;
70
return true;
57
MemOpIdx oi;
58
+ TCGv copy_addr;
59
60
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
61
memop = tcg_canonicalize_memop(memop, 0, 0);
62
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
63
}
71
}
64
}
72
+
65
73
+ ctx->s_mask = arg_info(op->args[1])->s_mask
66
- addr = plugin_prep_mem_callbacks(addr);
74
+ & arg_info(op->args[2])->s_mask;
67
+ copy_addr = plugin_maybe_preserve_addr(addr);
75
return false;
68
gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
69
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
70
+ plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
71
72
if ((orig_memop ^ memop) & MO_BSWAP) {
73
switch (orig_memop & MO_SIZE) {
74
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
75
memop &= ~MO_BSWAP;
76
}
77
78
- addr = plugin_prep_mem_callbacks(addr);
79
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
80
gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
81
} else {
82
gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
83
}
84
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
85
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
86
87
if (swap) {
88
tcg_temp_free_i32(swap);
89
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
90
{
91
MemOp orig_memop;
92
MemOpIdx oi;
93
+ TCGv copy_addr;
94
95
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
96
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
97
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
98
}
99
}
100
101
- addr = plugin_prep_mem_callbacks(addr);
102
+ copy_addr = plugin_maybe_preserve_addr(addr);
103
gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
104
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
105
+ plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
106
107
if ((orig_memop ^ memop) & MO_BSWAP) {
108
int flags = (orig_memop & MO_SIGN
109
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
110
memop &= ~MO_BSWAP;
111
}
112
113
- addr = plugin_prep_mem_callbacks(addr);
114
gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
115
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
116
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
117
118
if (swap) {
119
tcg_temp_free_i64(swap);
120
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
121
tcg_debug_assert((memop & MO_SIGN) == 0);
122
123
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
124
- addr = plugin_prep_mem_callbacks(addr);
125
126
/* TODO: For now, force 32-bit hosts to use the helper. */
127
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
129
maybe_free_addr64(a64);
130
}
131
132
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_R);
133
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
76
}
134
}
77
135
78
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
136
void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
79
return true;
137
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
138
tcg_debug_assert((memop & MO_SIGN) == 0);
139
140
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
141
- addr = plugin_prep_mem_callbacks(addr);
142
143
/* TODO: For now, force 32-bit hosts to use the helper. */
144
145
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
146
maybe_free_addr64(a64);
80
}
147
}
81
148
82
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
149
- plugin_gen_mem_callbacks(addr, oi, QEMU_PLUGIN_MEM_W);
83
+
150
+ plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
84
/* Because of fold_to_not, we want to always return true, via finish. */
85
finish_folding(ctx, op);
86
return true;
87
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
88
89
ctx->z_mask = arg_info(op->args[1])->z_mask
90
| arg_info(op->args[2])->z_mask;
91
+ ctx->s_mask = arg_info(op->args[1])->s_mask
92
+ & arg_info(op->args[2])->s_mask;
93
return fold_masks(ctx, op);
94
}
151
}
95
152
96
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
153
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
97
fold_ix_to_not(ctx, op, 0)) {
98
return true;
99
}
100
+
101
+ ctx->s_mask = arg_info(op->args[1])->s_mask
102
+ & arg_info(op->args[2])->s_mask;
103
return false;
104
}
105
106
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
107
108
ctx->z_mask = arg_info(op->args[1])->z_mask
109
| arg_info(op->args[2])->z_mask;
110
+ ctx->s_mask = arg_info(op->args[1])->s_mask
111
+ & arg_info(op->args[2])->s_mask;
112
return fold_masks(ctx, op);
113
}
114
115
--
154
--
116
2.25.1
155
2.34.1
117
156
118
157
diff view generated by jsdifflib
1
Provide what will become a larger context for splitting
1
Since we do this inside gen_empty_mem_cb anyway, let's
2
the very large tcg_optimize function.
2
do this earlier inside tcg expansion.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
7
include/exec/plugin-gen.h | 4 ++--
10
1 file changed, 40 insertions(+), 37 deletions(-)
8
accel/tcg/plugin-gen.c | 9 +++------
9
tcg/tcg-op-ldst.c | 28 ++++++++++++++++++++--------
10
3 files changed, 25 insertions(+), 16 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/include/exec/plugin-gen.h
15
+++ b/tcg/optimize.c
15
+++ b/include/exec/plugin-gen.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
16
@@ -XXX,XX +XXX,XX @@ void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
17
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
17
void plugin_gen_insn_end(void);
18
} TempOptInfo;
18
19
19
void plugin_gen_disable_mem_helpers(void);
20
+typedef struct OptContext {
20
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info);
21
+ TCGTempSet temps_used;
21
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info);
22
+} OptContext;
22
23
+
23
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
25
{
24
{
26
return ts->state_ptr;
25
@@ -XXX,XX +XXX,XX @@ static inline void plugin_gen_tb_end(CPUState *cpu)
27
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
26
static inline void plugin_gen_disable_mem_helpers(void)
27
{ }
28
29
-static inline void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
30
+static inline void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
31
{ }
32
33
static inline void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
34
diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/accel/tcg/plugin-gen.c
37
+++ b/accel/tcg/plugin-gen.c
38
@@ -XXX,XX +XXX,XX @@ static void gen_empty_inline_cb(void)
39
tcg_temp_free_i64(val);
28
}
40
}
29
41
30
/* Initialize and activate a temporary. */
42
-static void gen_empty_mem_cb(TCGv addr, uint32_t info)
31
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
43
+static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
32
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
33
{
44
{
34
size_t idx = temp_idx(ts);
45
TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
35
TempOptInfo *ti;
46
TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
36
47
- TCGv_i64 addr64 = tcg_temp_ebb_new_i64();
37
- if (test_bit(idx, temps_used->l)) {
48
TCGv_ptr udata = tcg_temp_ebb_new_ptr();
38
+ if (test_bit(idx, ctx->temps_used.l)) {
49
39
return;
50
tcg_gen_movi_i32(meminfo, info);
40
}
51
tcg_gen_movi_ptr(udata, 0);
41
- set_bit(idx, temps_used->l);
52
tcg_gen_ld_i32(cpu_index, cpu_env,
42
+ set_bit(idx, ctx->temps_used.l);
53
-offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
43
54
- tcg_gen_extu_tl_i64(addr64, addr);
44
ti = ts->state_ptr;
55
45
if (ti == NULL) {
56
- gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr64, udata);
46
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
57
+ gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr, udata);
58
59
tcg_temp_free_ptr(udata);
60
- tcg_temp_free_i64(addr64);
61
tcg_temp_free_i32(meminfo);
62
tcg_temp_free_i32(cpu_index);
63
}
64
@@ -XXX,XX +XXX,XX @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
47
}
65
}
48
}
66
}
49
67
50
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
68
-void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
51
+static void init_arg_info(OptContext *ctx, TCGArg arg)
69
+void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
52
{
70
{
53
- init_ts_info(temps_used, arg_temp(arg));
71
enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);
54
+ init_ts_info(ctx, arg_temp(arg));
72
73
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/tcg/tcg-op-ldst.c
76
+++ b/tcg/tcg-op-ldst.c
77
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
55
}
78
}
56
79
57
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
80
/* Only required for loads, where value might overlap addr. */
58
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
81
-static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
82
+static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
83
{
84
#ifdef CONFIG_PLUGIN
85
if (tcg_ctx->plugin_insn != NULL) {
86
/* Save a copy of the vaddr for use after a load. */
87
- TCGv temp = tcg_temp_new();
88
- tcg_gen_mov_tl(temp, vaddr);
89
+ TCGv_i64 temp = tcg_temp_ebb_new_i64();
90
+ tcg_gen_extu_tl_i64(temp, vaddr);
91
return temp;
59
}
92
}
93
#endif
94
@@ -XXX,XX +XXX,XX @@ static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
60
}
95
}
61
96
62
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
97
static void
63
+static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
98
-plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
64
TCGOp *op, TCGArg dst, uint64_t val)
99
+plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
100
enum qemu_plugin_mem_rw rw)
65
{
101
{
66
const TCGOpDef *def = &tcg_op_defs[op->opc];
102
#ifdef CONFIG_PLUGIN
67
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
103
if (tcg_ctx->plugin_insn != NULL) {
68
104
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
69
/* Convert movi to mov with constant temp. */
105
- plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
70
tv = tcg_constant_internal(type, val);
106
+
71
- init_ts_info(temps_used, tv);
107
+#if TARGET_LONG_BITS == 64
72
+ init_ts_info(ctx, tv);
108
if (copy_addr) {
73
tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
109
- tcg_temp_free(copy_addr);
110
+ plugin_gen_empty_mem_callback(copy_addr, info);
111
+ tcg_temp_free_i64(copy_addr);
112
+ } else {
113
+ plugin_gen_empty_mem_callback(orig_addr, info);
114
}
115
+#else
116
+ if (!copy_addr) {
117
+ copy_addr = tcg_temp_ebb_new_i64();
118
+ tcg_gen_extu_tl_i64(copy_addr, orig_addr);
119
+ }
120
+ plugin_gen_empty_mem_callback(copy_addr, info);
121
+ tcg_temp_free_i64(copy_addr);
122
+#endif
123
}
124
#endif
74
}
125
}
75
126
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
{
127
{
78
int nb_temps, nb_globals, i;
128
MemOp orig_memop;
79
TCGOp *op, *op_next, *prev_mb = NULL;
129
MemOpIdx oi;
80
- TCGTempSet temps_used;
130
- TCGv copy_addr;
81
+ OptContext ctx = {};
131
+ TCGv_i64 copy_addr;
82
132
83
/* Array VALS has an element for each temp.
133
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
84
If this temp holds a constant then its value is kept in VALS' element.
134
memop = tcg_canonicalize_memop(memop, 0, 0);
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
135
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
86
nb_temps = s->nb_temps;
136
{
87
nb_globals = s->nb_globals;
137
MemOp orig_memop;
88
138
MemOpIdx oi;
89
- memset(&temps_used, 0, sizeof(temps_used));
139
- TCGv copy_addr;
90
for (i = 0; i < nb_temps; ++i) {
140
+ TCGv_i64 copy_addr;
91
s->temps[i].state_ptr = NULL;
141
92
}
142
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
93
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
143
tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
94
for (i = 0; i < nb_oargs + nb_iargs; i++) {
95
TCGTemp *ts = arg_temp(op->args[i]);
96
if (ts) {
97
- init_ts_info(&temps_used, ts);
98
+ init_ts_info(&ctx, ts);
99
}
100
}
101
} else {
102
nb_oargs = def->nb_oargs;
103
nb_iargs = def->nb_iargs;
104
for (i = 0; i < nb_oargs + nb_iargs; i++) {
105
- init_arg_info(&temps_used, op->args[i]);
106
+ init_arg_info(&ctx, op->args[i]);
107
}
108
}
109
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64(rotr):
112
if (arg_is_const(op->args[1])
113
&& arg_info(op->args[1])->val == 0) {
114
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
115
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
116
continue;
117
}
118
break;
119
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
120
121
if (partmask == 0) {
122
tcg_debug_assert(nb_oargs == 1);
123
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
124
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
125
continue;
126
}
127
if (affected == 0) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
129
CASE_OP_32_64(mulsh):
130
if (arg_is_const(op->args[2])
131
&& arg_info(op->args[2])->val == 0) {
132
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
133
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
134
continue;
135
}
136
break;
137
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
138
CASE_OP_32_64_VEC(sub):
139
CASE_OP_32_64_VEC(xor):
140
if (args_are_copies(op->args[1], op->args[2])) {
141
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
142
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
143
continue;
144
}
145
break;
146
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
147
if (arg_is_const(op->args[1])) {
148
tmp = arg_info(op->args[1])->val;
149
tmp = dup_const(TCGOP_VECE(op), tmp);
150
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
151
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
break;
153
}
154
goto do_default;
155
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
156
case INDEX_op_dup2_vec:
157
assert(TCG_TARGET_REG_BITS == 32);
158
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
159
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
160
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
deposit64(arg_info(op->args[1])->val, 32, 32,
162
arg_info(op->args[2])->val));
163
break;
164
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
165
case INDEX_op_extrh_i64_i32:
166
if (arg_is_const(op->args[1])) {
167
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
168
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
169
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
break;
171
}
172
goto do_default;
173
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
174
if (arg_is_const(op->args[1])) {
175
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
176
op->args[2]);
177
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
178
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
break;
180
}
181
goto do_default;
182
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
183
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
184
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
185
arg_info(op->args[2])->val);
186
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
187
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
break;
189
}
190
goto do_default;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
TCGArg v = arg_info(op->args[1])->val;
193
if (v != 0) {
194
tmp = do_constant_folding(opc, v, 0);
195
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
196
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
} else {
198
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
199
}
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
tmp = deposit64(arg_info(op->args[1])->val,
202
op->args[3], op->args[4],
203
arg_info(op->args[2])->val);
204
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
205
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
206
break;
207
}
208
goto do_default;
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
if (arg_is_const(op->args[1])) {
211
tmp = extract64(arg_info(op->args[1])->val,
212
op->args[2], op->args[3]);
213
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
214
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
215
break;
216
}
217
goto do_default;
218
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
219
if (arg_is_const(op->args[1])) {
220
tmp = sextract64(arg_info(op->args[1])->val,
221
op->args[2], op->args[3]);
222
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
223
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
224
break;
225
}
226
goto do_default;
227
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
228
tmp = (int32_t)(((uint32_t)v1 >> shr) |
229
((uint32_t)v2 << (32 - shr)));
230
}
231
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
232
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
233
break;
234
}
235
goto do_default;
236
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
237
tmp = do_constant_folding_cond(opc, op->args[1],
238
op->args[2], op->args[3]);
239
if (tmp != 2) {
240
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
241
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
242
break;
243
}
244
goto do_default;
245
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
246
op->args[1], op->args[2]);
247
if (tmp != 2) {
248
if (tmp) {
249
- memset(&temps_used, 0, sizeof(temps_used));
250
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
251
op->opc = INDEX_op_br;
252
op->args[0] = op->args[3];
253
} else {
254
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
255
256
rl = op->args[0];
257
rh = op->args[1];
258
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
259
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
260
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
261
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
262
break;
263
}
264
goto do_default;
265
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
266
267
rl = op->args[0];
268
rh = op->args[1];
269
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
270
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
271
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
272
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
273
break;
274
}
275
goto do_default;
276
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
277
if (tmp != 2) {
278
if (tmp) {
279
do_brcond_true:
280
- memset(&temps_used, 0, sizeof(temps_used));
281
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
282
op->opc = INDEX_op_br;
283
op->args[0] = op->args[5];
284
} else {
285
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
286
/* Simplify LT/GE comparisons vs zero to a single compare
287
vs the high word of the input. */
288
do_brcond_high:
289
- memset(&temps_used, 0, sizeof(temps_used));
290
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
op->opc = INDEX_op_brcond_i32;
292
op->args[0] = op->args[1];
293
op->args[1] = op->args[3];
294
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
295
goto do_default;
296
}
297
do_brcond_low:
298
- memset(&temps_used, 0, sizeof(temps_used));
299
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
300
op->opc = INDEX_op_brcond_i32;
301
op->args[1] = op->args[2];
302
op->args[2] = op->args[4];
303
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
304
op->args[5]);
305
if (tmp != 2) {
306
do_setcond_const:
307
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
308
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
309
} else if ((op->args[5] == TCG_COND_LT
310
|| op->args[5] == TCG_COND_GE)
311
&& arg_is_const(op->args[3])
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (!(tcg_call_flags(op)
314
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
315
for (i = 0; i < nb_globals; i++) {
316
- if (test_bit(i, temps_used.l)) {
317
+ if (test_bit(i, ctx.temps_used.l)) {
318
reset_ts(&s->temps[i]);
319
}
320
}
321
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
322
block, otherwise we only trash the output args. "z_mask" is
323
the non-zero bits mask for the first output arg. */
324
if (def->flags & TCG_OPF_BB_END) {
325
- memset(&temps_used, 0, sizeof(temps_used));
326
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
327
} else {
328
do_reset_output:
329
for (i = 0; i < nb_oargs; i++) {
330
--
144
--
331
2.25.1
145
2.34.1
332
146
333
147
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
This will enable replacement of TARGET_LONG_BITS within tcg/.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/optimize.c | 33 +++++++++++++++++++--------------
6
include/tcg/tcg.h | 1 +
6
1 file changed, 19 insertions(+), 14 deletions(-)
7
accel/tcg/translate-all.c | 2 ++
8
tcg/tcg.c | 3 +++
9
3 files changed, 6 insertions(+)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/include/tcg/tcg.h
11
+++ b/tcg/optimize.c
14
+++ b/include/tcg/tcg.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
13
return fold_const2(ctx, op);
16
int nb_temps;
17
int nb_indirects;
18
int nb_ops;
19
+ TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
20
21
TCGRegSet reserved_regs;
22
intptr_t current_frame_offset;
23
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/accel/tcg/translate-all.c
26
+++ b/accel/tcg/translate-all.c
27
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
28
tb_set_page_addr0(tb, phys_pc);
29
tb_set_page_addr1(tb, -1);
30
tcg_ctx->gen_tb = tb;
31
+ tcg_ctx->addr_type = TCG_TYPE_TL;
32
+
33
tb_overflow:
34
35
#ifdef CONFIG_PROFILER
36
diff --git a/tcg/tcg.c b/tcg/tcg.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/tcg.c
39
+++ b/tcg/tcg.c
40
@@ -XXX,XX +XXX,XX @@ void tcg_func_start(TCGContext *s)
41
QTAILQ_INIT(&s->ops);
42
QTAILQ_INIT(&s->free_ops);
43
QSIMPLEQ_INIT(&s->labels);
44
+
45
+ tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
46
+ s->addr_type == TCG_TYPE_I64);
14
}
47
}
15
48
16
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
49
static TCGTemp *tcg_temp_alloc(TCGContext *s)
17
+{
18
+ TCGCond cond = op->args[2];
19
+ int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
20
+
21
+ if (i == 0) {
22
+ tcg_op_remove(ctx->tcg, op);
23
+ return true;
24
+ }
25
+ if (i > 0) {
26
+ op->opc = INDEX_op_br;
27
+ op->args[0] = op->args[3];
28
+ }
29
+ return false;
30
+}
31
+
32
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
33
{
34
TCGCond cond = op->args[4];
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
36
}
37
break;
38
39
- CASE_OP_32_64(brcond):
40
- i = do_constant_folding_cond(opc, op->args[0],
41
- op->args[1], op->args[2]);
42
- if (i == 0) {
43
- tcg_op_remove(s, op);
44
- continue;
45
- } else if (i > 0) {
46
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
47
- op->opc = opc = INDEX_op_br;
48
- op->args[0] = op->args[3];
49
- break;
50
- }
51
- break;
52
-
53
CASE_OP_32_64(movcond):
54
i = do_constant_folding_cond(opc, op->args[1],
55
op->args[2], op->args[5]);
56
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
57
CASE_OP_32_64_VEC(andc):
58
done = fold_andc(&ctx, op);
59
break;
60
+ CASE_OP_32_64(brcond):
61
+ done = fold_brcond(&ctx, op);
62
+ break;
63
case INDEX_op_brcond2_i32:
64
done = fold_brcond2(&ctx, op);
65
break;
66
--
50
--
67
2.25.1
51
2.34.1
68
52
69
53
diff view generated by jsdifflib
1
Pull the "op r, a, a => mov r, a" optimization into a function,
1
Expand from TCGv to TCGTemp inline in the translators,
2
and use it in the outer opcode fold functions.
2
and validate that the size matches tcg_ctx->addr_type.
3
These inlines will eventually be seen only by target-specific code.
3
4
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
8
include/tcg/tcg-op.h | 50 ++++++-
9
1 file changed, 24 insertions(+), 15 deletions(-)
9
tcg/tcg-op-ldst.c | 343 ++++++++++++++++++++++++++-----------------
10
2 files changed, 251 insertions(+), 142 deletions(-)
10
11
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
--- a/include/tcg/tcg-op.h
14
+++ b/tcg/optimize.c
15
+++ b/include/tcg/tcg-op.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
16
@@ -XXX,XX +XXX,XX @@ static inline void tcg_gen_plugin_cb_end(void)
16
return false;
17
#define tcg_temp_new() tcg_temp_new_i32()
17
}
18
#define tcg_global_mem_new tcg_global_mem_new_i32
18
19
#define tcg_temp_free tcg_temp_free_i32
19
+/* If the binary operation has both arguments equal, fold to identity. */
20
+#define tcgv_tl_temp tcgv_i32_temp
20
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
21
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i32
21
+{
22
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i32
22
+ if (args_are_copies(op->args[1], op->args[2])) {
23
#else
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
24
#define tcg_temp_new() tcg_temp_new_i64()
24
+ }
25
#define tcg_global_mem_new tcg_global_mem_new_i64
25
+ return false;
26
#define tcg_temp_free tcg_temp_free_i64
27
+#define tcgv_tl_temp tcgv_i64_temp
28
#define tcg_gen_qemu_ld_tl tcg_gen_qemu_ld_i64
29
#define tcg_gen_qemu_st_tl tcg_gen_qemu_st_i64
30
#endif
31
32
-void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
33
-void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
34
-void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
35
-void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
36
-void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
37
-void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
38
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
39
+void tcg_gen_qemu_st_i32_chk(TCGv_i32, TCGTemp *, TCGArg, MemOp, TCGType);
40
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
41
+void tcg_gen_qemu_st_i64_chk(TCGv_i64, TCGTemp *, TCGArg, MemOp, TCGType);
42
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
43
+void tcg_gen_qemu_st_i128_chk(TCGv_i128, TCGTemp *, TCGArg, MemOp, TCGType);
44
+
45
+static inline void
46
+tcg_gen_qemu_ld_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
47
+{
48
+ tcg_gen_qemu_ld_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
49
+}
50
+
51
+static inline void
52
+tcg_gen_qemu_st_i32(TCGv_i32 v, TCGv a, TCGArg i, MemOp m)
53
+{
54
+ tcg_gen_qemu_st_i32_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
55
+}
56
+
57
+static inline void
58
+tcg_gen_qemu_ld_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
59
+{
60
+ tcg_gen_qemu_ld_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
61
+}
62
+
63
+static inline void
64
+tcg_gen_qemu_st_i64(TCGv_i64 v, TCGv a, TCGArg i, MemOp m)
65
+{
66
+ tcg_gen_qemu_st_i64_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
67
+}
68
+
69
+static inline void
70
+tcg_gen_qemu_ld_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
71
+{
72
+ tcg_gen_qemu_ld_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
73
+}
74
+
75
+static inline void
76
+tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
77
+{
78
+ tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
79
+}
80
81
void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
82
TCGArg, MemOp);
83
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/tcg/tcg-op-ldst.c
86
+++ b/tcg/tcg-op-ldst.c
87
@@ -XXX,XX +XXX,XX @@ static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
88
return op;
89
}
90
91
-static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
92
- MemOp memop, TCGArg idx)
93
+static void gen_ldst(TCGOpcode opc, TCGTemp *vl, TCGTemp *vh,
94
+ TCGTemp *addr, MemOpIdx oi)
95
{
96
- MemOpIdx oi = make_memop_idx(memop, idx);
97
-#if TARGET_LONG_BITS == 32
98
- tcg_gen_op3i_i32(opc, val, addr, oi);
99
-#else
100
- if (TCG_TARGET_REG_BITS == 32) {
101
- tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
102
+ if (TCG_TARGET_REG_BITS == 64 || tcg_ctx->addr_type == TCG_TYPE_I32) {
103
+ if (vh) {
104
+ tcg_gen_op4(opc, temp_arg(vl), temp_arg(vh), temp_arg(addr), oi);
105
+ } else {
106
+ tcg_gen_op3(opc, temp_arg(vl), temp_arg(addr), oi);
107
+ }
108
} else {
109
- tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
110
+ /* See TCGV_LOW/HIGH. */
111
+ TCGTemp *al = addr + HOST_BIG_ENDIAN;
112
+ TCGTemp *ah = addr + !HOST_BIG_ENDIAN;
113
+
114
+ if (vh) {
115
+ tcg_gen_op5(opc, temp_arg(vl), temp_arg(vh),
116
+ temp_arg(al), temp_arg(ah), oi);
117
+ } else {
118
+ tcg_gen_op4(opc, temp_arg(vl), temp_arg(al), temp_arg(ah), oi);
119
+ }
120
}
121
-#endif
122
}
123
124
-static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
125
- MemOp memop, TCGArg idx)
126
+static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 v, TCGTemp *addr, MemOpIdx oi)
127
{
128
- MemOpIdx oi = make_memop_idx(memop, idx);
129
-#if TARGET_LONG_BITS == 32
130
if (TCG_TARGET_REG_BITS == 32) {
131
- tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
132
+ TCGTemp *vl = tcgv_i32_temp(TCGV_LOW(v));
133
+ TCGTemp *vh = tcgv_i32_temp(TCGV_HIGH(v));
134
+ gen_ldst(opc, vl, vh, addr, oi);
135
} else {
136
- tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
137
+ gen_ldst(opc, tcgv_i64_temp(v), NULL, addr, oi);
138
}
139
-#else
140
- if (TCG_TARGET_REG_BITS == 32) {
141
- tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
142
- TCGV_LOW(addr), TCGV_HIGH(addr), oi);
143
- } else {
144
- tcg_gen_op3i_i64(opc, val, addr, oi);
145
- }
146
-#endif
147
}
148
149
static void tcg_gen_req_mo(TCGBar type)
150
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_req_mo(TCGBar type)
151
}
152
153
/* Only required for loads, where value might overlap addr. */
154
-static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
155
+static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
156
{
157
#ifdef CONFIG_PLUGIN
158
if (tcg_ctx->plugin_insn != NULL) {
159
/* Save a copy of the vaddr for use after a load. */
160
TCGv_i64 temp = tcg_temp_ebb_new_i64();
161
- tcg_gen_extu_tl_i64(temp, vaddr);
162
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
163
+ tcg_gen_extu_i32_i64(temp, temp_tcgv_i32(addr));
164
+ } else {
165
+ tcg_gen_mov_i64(temp, temp_tcgv_i64(addr));
166
+ }
167
return temp;
168
}
169
#endif
170
@@ -XXX,XX +XXX,XX @@ static TCGv_i64 plugin_maybe_preserve_addr(TCGv vaddr)
171
}
172
173
static void
174
-plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGv orig_addr, MemOpIdx oi,
175
+plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
176
enum qemu_plugin_mem_rw rw)
177
{
178
#ifdef CONFIG_PLUGIN
179
if (tcg_ctx->plugin_insn != NULL) {
180
qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
181
182
-#if TARGET_LONG_BITS == 64
183
- if (copy_addr) {
184
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
185
+ if (!copy_addr) {
186
+ copy_addr = tcg_temp_ebb_new_i64();
187
+ tcg_gen_extu_i32_i64(copy_addr, temp_tcgv_i32(orig_addr));
188
+ }
189
plugin_gen_empty_mem_callback(copy_addr, info);
190
tcg_temp_free_i64(copy_addr);
191
} else {
192
- plugin_gen_empty_mem_callback(orig_addr, info);
193
+ if (copy_addr) {
194
+ plugin_gen_empty_mem_callback(copy_addr, info);
195
+ tcg_temp_free_i64(copy_addr);
196
+ } else {
197
+ plugin_gen_empty_mem_callback(temp_tcgv_i64(orig_addr), info);
198
+ }
199
}
200
-#else
201
- if (!copy_addr) {
202
- copy_addr = tcg_temp_ebb_new_i64();
203
- tcg_gen_extu_tl_i64(copy_addr, orig_addr);
204
- }
205
- plugin_gen_empty_mem_callback(copy_addr, info);
206
- tcg_temp_free_i64(copy_addr);
207
-#endif
208
}
209
#endif
210
}
211
212
-void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
213
+static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
214
+ TCGArg idx, MemOp memop)
215
{
216
MemOp orig_memop;
217
- MemOpIdx oi;
218
+ MemOpIdx orig_oi, oi;
219
TCGv_i64 copy_addr;
220
221
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
222
- memop = tcg_canonicalize_memop(memop, 0, 0);
223
- oi = make_memop_idx(memop, idx);
224
+ orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
225
+ orig_oi = oi = make_memop_idx(memop, idx);
226
227
- orig_memop = memop;
228
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
229
memop &= ~MO_BSWAP;
230
/* The bswap primitive benefits from zero-extended input. */
231
if ((memop & MO_SSIZE) == MO_SW) {
232
memop &= ~MO_SIGN;
233
}
234
+ oi = make_memop_idx(memop, idx);
235
}
236
237
copy_addr = plugin_maybe_preserve_addr(addr);
238
- gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
239
- plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
240
+ gen_ldst(INDEX_op_qemu_ld_i32, tcgv_i32_temp(val), NULL, addr, oi);
241
+ plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
242
243
if ((orig_memop ^ memop) & MO_BSWAP) {
244
switch (orig_memop & MO_SIZE) {
245
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
246
}
247
}
248
249
-void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
250
+void tcg_gen_qemu_ld_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
251
+ MemOp memop, TCGType addr_type)
252
+{
253
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
254
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
255
+ tcg_gen_qemu_ld_i32_int(val, addr, idx, memop);
256
+}
257
+
258
+static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
259
+ TCGArg idx, MemOp memop)
260
{
261
TCGv_i32 swap = NULL;
262
- MemOpIdx oi;
263
+ MemOpIdx orig_oi, oi;
264
+ TCGOpcode opc;
265
266
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
267
memop = tcg_canonicalize_memop(memop, 0, 1);
268
- oi = make_memop_idx(memop, idx);
269
+ orig_oi = oi = make_memop_idx(memop, idx);
270
271
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
272
swap = tcg_temp_ebb_new_i32();
273
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
274
}
275
val = swap;
276
memop &= ~MO_BSWAP;
277
+ oi = make_memop_idx(memop, idx);
278
}
279
280
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
281
- gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
282
+ opc = INDEX_op_qemu_st8_i32;
283
} else {
284
- gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
285
+ opc = INDEX_op_qemu_st_i32;
286
}
287
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
288
+ gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
289
+ plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
290
291
if (swap) {
292
tcg_temp_free_i32(swap);
293
}
294
}
295
296
-void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
297
+void tcg_gen_qemu_st_i32_chk(TCGv_i32 val, TCGTemp *addr, TCGArg idx,
298
+ MemOp memop, TCGType addr_type)
299
+{
300
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
301
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
302
+ tcg_gen_qemu_st_i32_int(val, addr, idx, memop);
303
+}
304
+
305
+static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
306
+ TCGArg idx, MemOp memop)
307
{
308
MemOp orig_memop;
309
- MemOpIdx oi;
310
+ MemOpIdx orig_oi, oi;
311
TCGv_i64 copy_addr;
312
313
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
314
- tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
315
+ tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
316
if (memop & MO_SIGN) {
317
tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
318
} else {
319
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
320
}
321
322
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
323
- memop = tcg_canonicalize_memop(memop, 1, 0);
324
- oi = make_memop_idx(memop, idx);
325
+ orig_memop = memop = tcg_canonicalize_memop(memop, 1, 0);
326
+ orig_oi = oi = make_memop_idx(memop, idx);
327
328
- orig_memop = memop;
329
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
330
memop &= ~MO_BSWAP;
331
/* The bswap primitive benefits from zero-extended input. */
332
if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
333
memop &= ~MO_SIGN;
334
}
335
+ oi = make_memop_idx(memop, idx);
336
}
337
338
copy_addr = plugin_maybe_preserve_addr(addr);
339
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
340
- plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
341
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
342
+ plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
343
344
if ((orig_memop ^ memop) & MO_BSWAP) {
345
int flags = (orig_memop & MO_SIGN
346
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
347
}
348
}
349
350
-void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
351
+void tcg_gen_qemu_ld_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
352
+ MemOp memop, TCGType addr_type)
353
+{
354
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
355
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
356
+ tcg_gen_qemu_ld_i64_int(val, addr, idx, memop);
357
+}
358
+
359
+static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
360
+ TCGArg idx, MemOp memop)
361
{
362
TCGv_i64 swap = NULL;
363
- MemOpIdx oi;
364
+ MemOpIdx orig_oi, oi;
365
366
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
367
- tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
368
+ tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
369
return;
370
}
371
372
tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
373
memop = tcg_canonicalize_memop(memop, 1, 1);
374
- oi = make_memop_idx(memop, idx);
375
+ orig_oi = oi = make_memop_idx(memop, idx);
376
377
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
378
swap = tcg_temp_ebb_new_i64();
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
380
}
381
val = swap;
382
memop &= ~MO_BSWAP;
383
+ oi = make_memop_idx(memop, idx);
384
}
385
386
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
387
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
388
+ gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
389
+ plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
390
391
if (swap) {
392
tcg_temp_free_i64(swap);
393
}
394
}
395
396
+void tcg_gen_qemu_st_i64_chk(TCGv_i64 val, TCGTemp *addr, TCGArg idx,
397
+ MemOp memop, TCGType addr_type)
398
+{
399
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
400
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
401
+ tcg_gen_qemu_st_i64_int(val, addr, idx, memop);
26
+}
402
+}
27
+
403
+
28
/*
404
/*
29
* These outermost fold_<op> functions are sorted alphabetically.
405
* Return true if @mop, without knowledge of the pointer alignment,
30
+ *
406
* does not require 16-byte atomicity, and it would be adventagous
31
+ * The ordering of the transformations should be:
407
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
32
+ * 1) those that produce a constant
408
{
33
+ * 2) those that produce a copy
409
MemOp mop_1 = orig, mop_2;
34
+ * 3) those that produce information about the result value.
410
35
*/
411
- tcg_debug_assert((orig & MO_SIZE) == MO_128);
36
412
- tcg_debug_assert((orig & MO_SIGN) == 0);
37
static bool fold_add(OptContext *ctx, TCGOp *op)
38
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
39
40
static bool fold_and(OptContext *ctx, TCGOp *op)
41
{
42
- return fold_const2(ctx, op);
43
+ if (fold_const2(ctx, op) ||
44
+ fold_xx_to_x(ctx, op)) {
45
+ return true;
46
+ }
47
+ return false;
48
}
49
50
static bool fold_andc(OptContext *ctx, TCGOp *op)
51
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
52
53
static bool fold_or(OptContext *ctx, TCGOp *op)
54
{
55
- return fold_const2(ctx, op);
56
+ if (fold_const2(ctx, op) ||
57
+ fold_xx_to_x(ctx, op)) {
58
+ return true;
59
+ }
60
+ return false;
61
}
62
63
static bool fold_orc(OptContext *ctx, TCGOp *op)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
65
break;
66
}
67
68
- /* Simplify expression for "op r, a, a => mov r, a" cases */
69
- switch (opc) {
70
- CASE_OP_32_64_VEC(or):
71
- CASE_OP_32_64_VEC(and):
72
- if (args_are_copies(op->args[1], op->args[2])) {
73
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
74
- continue;
75
- }
76
- break;
77
- default:
78
- break;
79
- }
80
-
413
-
414
/* Reduce the size to 64-bit. */
415
mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
416
417
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
418
ret[1] = mop_2;
419
}
420
421
-#if TARGET_LONG_BITS == 64
422
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i64
423
-#else
424
-#define tcg_temp_ebb_new tcg_temp_ebb_new_i32
425
-#endif
426
-
427
static TCGv_i64 maybe_extend_addr64(TCGv addr)
428
{
429
#if TARGET_LONG_BITS == 32
430
@@ -XXX,XX +XXX,XX @@ static void maybe_free_addr64(TCGv_i64 a64)
431
#endif
432
}
433
434
-void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
435
+static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
436
+ TCGArg idx, MemOp memop)
437
{
438
- const MemOpIdx oi = make_memop_idx(memop, idx);
439
-
440
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
441
- tcg_debug_assert((memop & MO_SIGN) == 0);
442
+ const MemOpIdx orig_oi = make_memop_idx(memop, idx);
443
+ TCGv_i64 ext_addr = NULL;
444
445
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
446
447
/* TODO: For now, force 32-bit hosts to use the helper. */
448
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
449
TCGv_i64 lo, hi;
450
- TCGArg addr_arg;
451
- MemOpIdx adj_oi;
452
bool need_bswap = false;
453
+ MemOpIdx oi = orig_oi;
454
455
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
456
lo = TCGV128_HIGH(val);
457
hi = TCGV128_LOW(val);
458
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
459
+ oi = make_memop_idx(memop & ~MO_BSWAP, idx);
460
need_bswap = true;
461
} else {
462
lo = TCGV128_LOW(val);
463
hi = TCGV128_HIGH(val);
464
- adj_oi = oi;
465
}
466
467
-#if TARGET_LONG_BITS == 32
468
- addr_arg = tcgv_i32_arg(addr);
469
-#else
470
- addr_arg = tcgv_i64_arg(addr);
471
-#endif
472
- tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
473
+ gen_ldst(INDEX_op_qemu_ld_i128, tcgv_i64_temp(lo),
474
+ tcgv_i64_temp(hi), addr, oi);
475
476
if (need_bswap) {
477
tcg_gen_bswap64_i64(lo, lo);
478
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
479
}
480
} else if (use_two_i64_for_i128(memop)) {
481
MemOp mop[2];
482
- TCGv addr_p8;
483
+ TCGTemp *addr_p8;
484
TCGv_i64 x, y;
485
+ MemOpIdx oi;
486
+ bool need_bswap;
487
488
canonicalize_memop_i128_as_i64(mop, memop);
489
+ need_bswap = (mop[0] ^ memop) & MO_BSWAP;
490
81
/*
491
/*
82
* Process each opcode.
492
* Since there are no global TCGv_i128, there is no visible state
83
* Sorted alphabetically by opcode as much as possible.
493
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
494
y = TCGV128_LOW(val);
495
}
496
497
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
498
+ oi = make_memop_idx(mop[0], idx);
499
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, oi);
500
501
- if ((mop[0] ^ memop) & MO_BSWAP) {
502
+ if (need_bswap) {
503
tcg_gen_bswap64_i64(x, x);
504
}
505
506
- addr_p8 = tcg_temp_ebb_new();
507
- tcg_gen_addi_tl(addr_p8, addr, 8);
508
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
509
- tcg_temp_free(addr_p8);
510
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
511
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
512
+ tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
513
+ addr_p8 = tcgv_i32_temp(t);
514
+ } else {
515
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
516
+ tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
517
+ addr_p8 = tcgv_i64_temp(t);
518
+ }
519
520
- if ((mop[0] ^ memop) & MO_BSWAP) {
521
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, oi);
522
+ tcg_temp_free_internal(addr_p8);
523
+
524
+ if (need_bswap) {
525
tcg_gen_bswap64_i64(y, y);
526
}
527
} else {
528
- TCGv_i64 a64 = maybe_extend_addr64(addr);
529
- gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
530
- maybe_free_addr64(a64);
531
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
532
+ ext_addr = tcg_temp_ebb_new_i64();
533
+ tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
534
+ addr = tcgv_i64_temp(ext_addr);
535
+ }
536
+ gen_helper_ld_i128(val, cpu_env, temp_tcgv_i64(addr),
537
+ tcg_constant_i32(orig_oi));
538
}
539
540
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
541
+ plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
542
}
543
544
-void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
545
+void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
546
+ MemOp memop, TCGType addr_type)
547
{
548
- const MemOpIdx oi = make_memop_idx(memop, idx);
549
-
550
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
551
tcg_debug_assert((memop & MO_SIZE) == MO_128);
552
tcg_debug_assert((memop & MO_SIGN) == 0);
553
+ tcg_gen_qemu_ld_i128_int(val, addr, idx, memop);
554
+}
555
+
556
+static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
557
+ TCGArg idx, MemOp memop)
558
+{
559
+ const MemOpIdx orig_oi = make_memop_idx(memop, idx);
560
+ TCGv_i64 ext_addr = NULL;
561
562
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
563
564
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
565
566
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
567
TCGv_i64 lo, hi;
568
- TCGArg addr_arg;
569
- MemOpIdx adj_oi;
570
+ MemOpIdx oi = orig_oi;
571
bool need_bswap = false;
572
573
if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
574
- lo = tcg_temp_new_i64();
575
- hi = tcg_temp_new_i64();
576
+ lo = tcg_temp_ebb_new_i64();
577
+ hi = tcg_temp_ebb_new_i64();
578
tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
579
tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
580
- adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
581
+ oi = make_memop_idx(memop & ~MO_BSWAP, idx);
582
need_bswap = true;
583
} else {
584
lo = TCGV128_LOW(val);
585
hi = TCGV128_HIGH(val);
586
- adj_oi = oi;
587
}
588
589
-#if TARGET_LONG_BITS == 32
590
- addr_arg = tcgv_i32_arg(addr);
591
-#else
592
- addr_arg = tcgv_i64_arg(addr);
593
-#endif
594
- tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
595
+ gen_ldst(INDEX_op_qemu_st_i128, tcgv_i64_temp(lo),
596
+ tcgv_i64_temp(hi), addr, oi);
597
598
if (need_bswap) {
599
tcg_temp_free_i64(lo);
600
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
601
}
602
} else if (use_two_i64_for_i128(memop)) {
603
MemOp mop[2];
604
- TCGv addr_p8;
605
- TCGv_i64 x, y;
606
+ TCGTemp *addr_p8;
607
+ TCGv_i64 x, y, b = NULL;
608
609
canonicalize_memop_i128_as_i64(mop, memop);
610
611
@@ -XXX,XX +XXX,XX @@ void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
612
y = TCGV128_LOW(val);
613
}
614
615
- addr_p8 = tcg_temp_ebb_new();
616
if ((mop[0] ^ memop) & MO_BSWAP) {
617
- TCGv_i64 t = tcg_temp_ebb_new_i64();
618
-
619
- tcg_gen_bswap64_i64(t, x);
620
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
621
- tcg_gen_bswap64_i64(t, y);
622
- tcg_gen_addi_tl(addr_p8, addr, 8);
623
- gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
624
- tcg_temp_free_i64(t);
625
- } else {
626
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
627
- tcg_gen_addi_tl(addr_p8, addr, 8);
628
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
629
+ b = tcg_temp_ebb_new_i64();
630
+ tcg_gen_bswap64_i64(b, x);
631
+ x = b;
632
}
633
- tcg_temp_free(addr_p8);
634
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
635
+ make_memop_idx(mop[0], idx));
636
+
637
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
638
+ TCGv_i32 t = tcg_temp_ebb_new_i32();
639
+ tcg_gen_addi_i32(t, temp_tcgv_i32(addr), 8);
640
+ addr_p8 = tcgv_i32_temp(t);
641
+ } else {
642
+ TCGv_i64 t = tcg_temp_ebb_new_i64();
643
+ tcg_gen_addi_i64(t, temp_tcgv_i64(addr), 8);
644
+ addr_p8 = tcgv_i64_temp(t);
645
+ }
646
+
647
+ if (b) {
648
+ tcg_gen_bswap64_i64(b, y);
649
+ y = b;
650
+ }
651
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
652
+ make_memop_idx(mop[1], idx));
653
+
654
+ if (b) {
655
+ tcg_temp_free_i64(b);
656
+ }
657
+ tcg_temp_free_internal(addr_p8);
658
} else {
659
- TCGv_i64 a64 = maybe_extend_addr64(addr);
660
- gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
661
- maybe_free_addr64(a64);
662
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
663
+ ext_addr = tcg_temp_ebb_new_i64();
664
+ tcg_gen_extu_i32_i64(ext_addr, temp_tcgv_i32(addr));
665
+ addr = tcgv_i64_temp(ext_addr);
666
+ }
667
+ gen_helper_st_i128(cpu_env, temp_tcgv_i64(addr), val,
668
+ tcg_constant_i32(orig_oi));
669
}
670
671
- plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
672
+ plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W);
673
+}
674
+
675
+void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
676
+ MemOp memop, TCGType addr_type)
677
+{
678
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
679
+ tcg_debug_assert((memop & MO_SIZE) == MO_128);
680
+ tcg_debug_assert((memop & MO_SIGN) == 0);
681
+ tcg_gen_qemu_st_i128_int(val, addr, idx, memop);
682
}
683
684
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
84
--
685
--
85
2.25.1
686
2.34.1
86
687
87
688
diff view generated by jsdifflib
1
Split out a whole bunch of placeholder functions, which are
1
Expand from TCGv to TCGTemp inline in the translators,
2
currently identical. That won't last as more code gets moved.
2
and validate that the size matches tcg_ctx->addr_type.
3
4
Use CASE_32_64_VEC for some logical operators that previously
5
missed the addition of vectors.
6
3
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
7
include/tcg/tcg-op.h | 184 ++++++++++++++++++++++++++++++----------
12
1 file changed, 219 insertions(+), 52 deletions(-)
8
tcg/tcg-op-ldst.c | 198 ++++++++++++++++++++++++++++---------------
9
2 files changed, 267 insertions(+), 115 deletions(-)
13
10
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
13
--- a/include/tcg/tcg-op.h
17
+++ b/tcg/optimize.c
14
+++ b/include/tcg/tcg-op.h
18
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ tcg_gen_qemu_st_i128(TCGv_i128 v, TCGv a, TCGArg i, MemOp m)
19
}
16
tcg_gen_qemu_st_i128_chk(v, tcgv_tl_temp(a), i, m, TCG_TYPE_TL);
20
}
17
}
21
18
22
+/*
19
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
23
+ * The fold_* functions return true when processing is complete,
20
- TCGArg, MemOp);
24
+ * usually by folding the operation to a constant or to a copy,
21
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
25
+ * and calling tcg_opt_gen_{mov,movi}. They may do other things,
22
- TCGArg, MemOp);
26
+ * like collect information about the value produced, for use in
23
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
27
+ * optimizing a subsequent operation.
24
- TCGArg, MemOp);
28
+ *
25
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
29
+ * These first fold_* functions are all helpers, used by other
26
+ TCGArg, MemOp, TCGType);
30
+ * folders for more specific operations.
27
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
31
+ */
28
+ TCGArg, MemOp, TCGType);
32
+
29
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
33
+static bool fold_const1(OptContext *ctx, TCGOp *op)
30
+ TCGv_i128, TCGArg, MemOp, TCGType);
31
32
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGv_i32,
33
- TCGArg, MemOp);
34
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64,
35
- TCGArg, MemOp);
36
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128, TCGv, TCGv_i128, TCGv_i128,
37
- TCGArg, MemOp);
38
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32, TCGv_i32,
39
+ TCGArg, MemOp, TCGType);
40
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64, TCGv_i64,
41
+ TCGArg, MemOp, TCGType);
42
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128, TCGTemp *, TCGv_i128,
43
+ TCGv_i128, TCGArg, MemOp, TCGType);
44
45
-void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
46
-void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
47
+void tcg_gen_atomic_xchg_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
48
+ TCGArg, MemOp, TCGType);
49
+void tcg_gen_atomic_xchg_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
50
+ TCGArg, MemOp, TCGType);
51
52
-void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
53
-void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
54
-void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
55
-void tcg_gen_atomic_fetch_and_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
56
-void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
57
-void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
58
-void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
59
-void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
60
-void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
61
-void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
62
-void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
63
-void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
64
-void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
65
-void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
66
-void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
67
-void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
68
+void tcg_gen_atomic_fetch_add_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
69
+ TCGArg, MemOp, TCGType);
70
+void tcg_gen_atomic_fetch_add_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
71
+ TCGArg, MemOp, TCGType);
72
+void tcg_gen_atomic_fetch_and_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
73
+ TCGArg, MemOp, TCGType);
74
+void tcg_gen_atomic_fetch_and_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
75
+ TCGArg, MemOp, TCGType);
76
+void tcg_gen_atomic_fetch_or_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
77
+ TCGArg, MemOp, TCGType);
78
+void tcg_gen_atomic_fetch_or_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
79
+ TCGArg, MemOp, TCGType);
80
+void tcg_gen_atomic_fetch_xor_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
81
+ TCGArg, MemOp, TCGType);
82
+void tcg_gen_atomic_fetch_xor_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
83
+ TCGArg, MemOp, TCGType);
84
+void tcg_gen_atomic_fetch_smin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
85
+ TCGArg, MemOp, TCGType);
86
+void tcg_gen_atomic_fetch_smin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
87
+ TCGArg, MemOp, TCGType);
88
+void tcg_gen_atomic_fetch_umin_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
89
+ TCGArg, MemOp, TCGType);
90
+void tcg_gen_atomic_fetch_umin_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
91
+ TCGArg, MemOp, TCGType);
92
+void tcg_gen_atomic_fetch_smax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
93
+ TCGArg, MemOp, TCGType);
94
+void tcg_gen_atomic_fetch_smax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
95
+ TCGArg, MemOp, TCGType);
96
+void tcg_gen_atomic_fetch_umax_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
97
+ TCGArg, MemOp, TCGType);
98
+void tcg_gen_atomic_fetch_umax_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
99
+ TCGArg, MemOp, TCGType);
100
101
-void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
102
-void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
103
-void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
104
-void tcg_gen_atomic_and_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
105
-void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
106
-void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
107
-void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
108
-void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
109
-void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
110
-void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
111
-void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
112
-void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
113
-void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
114
-void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
115
-void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, MemOp);
116
-void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, MemOp);
117
+void tcg_gen_atomic_add_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
118
+ TCGArg, MemOp, TCGType);
119
+void tcg_gen_atomic_add_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
120
+ TCGArg, MemOp, TCGType);
121
+void tcg_gen_atomic_and_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
122
+ TCGArg, MemOp, TCGType);
123
+void tcg_gen_atomic_and_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
124
+ TCGArg, MemOp, TCGType);
125
+void tcg_gen_atomic_or_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
126
+ TCGArg, MemOp, TCGType);
127
+void tcg_gen_atomic_or_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
128
+ TCGArg, MemOp, TCGType);
129
+void tcg_gen_atomic_xor_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
130
+ TCGArg, MemOp, TCGType);
131
+void tcg_gen_atomic_xor_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
132
+ TCGArg, MemOp, TCGType);
133
+void tcg_gen_atomic_smin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
134
+ TCGArg, MemOp, TCGType);
135
+void tcg_gen_atomic_smin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
136
+ TCGArg, MemOp, TCGType);
137
+void tcg_gen_atomic_umin_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
138
+ TCGArg, MemOp, TCGType);
139
+void tcg_gen_atomic_umin_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
140
+ TCGArg, MemOp, TCGType);
141
+void tcg_gen_atomic_smax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
142
+ TCGArg, MemOp, TCGType);
143
+void tcg_gen_atomic_smax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
144
+ TCGArg, MemOp, TCGType);
145
+void tcg_gen_atomic_umax_fetch_i32_chk(TCGv_i32, TCGTemp *, TCGv_i32,
146
+ TCGArg, MemOp, TCGType);
147
+void tcg_gen_atomic_umax_fetch_i64_chk(TCGv_i64, TCGTemp *, TCGv_i64,
148
+ TCGArg, MemOp, TCGType);
149
+
150
+#define DEF_ATOMIC2(N, S) \
151
+ static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S v, \
152
+ TCGArg i, MemOp m) \
153
+ { N##_##S##_chk(r, tcgv_tl_temp(a), v, i, m, TCG_TYPE_TL); }
154
+
155
+#define DEF_ATOMIC3(N, S) \
156
+ static inline void N##_##S(TCGv_##S r, TCGv a, TCGv_##S o, \
157
+ TCGv_##S n, TCGArg i, MemOp m) \
158
+ { N##_##S##_chk(r, tcgv_tl_temp(a), o, n, i, m, TCG_TYPE_TL); }
159
+
160
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i32)
161
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i64)
162
+DEF_ATOMIC3(tcg_gen_atomic_cmpxchg, i128)
163
+
164
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i32)
165
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i64)
166
+DEF_ATOMIC3(tcg_gen_nonatomic_cmpxchg, i128)
167
+
168
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i32)
169
+DEF_ATOMIC2(tcg_gen_atomic_xchg, i64)
170
+
171
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i32)
172
+DEF_ATOMIC2(tcg_gen_atomic_fetch_add, i64)
173
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i32)
174
+DEF_ATOMIC2(tcg_gen_atomic_fetch_and, i64)
175
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i32)
176
+DEF_ATOMIC2(tcg_gen_atomic_fetch_or, i64)
177
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i32)
178
+DEF_ATOMIC2(tcg_gen_atomic_fetch_xor, i64)
179
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i32)
180
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smin, i64)
181
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i32)
182
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umin, i64)
183
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i32)
184
+DEF_ATOMIC2(tcg_gen_atomic_fetch_smax, i64)
185
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i32)
186
+DEF_ATOMIC2(tcg_gen_atomic_fetch_umax, i64)
187
+
188
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i32)
189
+DEF_ATOMIC2(tcg_gen_atomic_add_fetch, i64)
190
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i32)
191
+DEF_ATOMIC2(tcg_gen_atomic_and_fetch, i64)
192
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i32)
193
+DEF_ATOMIC2(tcg_gen_atomic_or_fetch, i64)
194
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i32)
195
+DEF_ATOMIC2(tcg_gen_atomic_xor_fetch, i64)
196
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i32)
197
+DEF_ATOMIC2(tcg_gen_atomic_smin_fetch, i64)
198
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i32)
199
+DEF_ATOMIC2(tcg_gen_atomic_umin_fetch, i64)
200
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i32)
201
+DEF_ATOMIC2(tcg_gen_atomic_smax_fetch, i64)
202
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i32)
203
+DEF_ATOMIC2(tcg_gen_atomic_umax_fetch, i64)
204
+
205
+#undef DEF_ATOMIC2
206
+#undef DEF_ATOMIC3
207
208
void tcg_gen_mov_vec(TCGv_vec, TCGv_vec);
209
void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32);
210
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
211
index XXXXXXX..XXXXXXX 100644
212
--- a/tcg/tcg-op-ldst.c
213
+++ b/tcg/tcg-op-ldst.c
214
@@ -XXX,XX +XXX,XX @@ static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
215
ret[1] = mop_2;
216
}
217
218
-static TCGv_i64 maybe_extend_addr64(TCGv addr)
219
+static TCGv_i64 maybe_extend_addr64(TCGTemp *addr)
220
{
221
-#if TARGET_LONG_BITS == 32
222
- TCGv_i64 a64 = tcg_temp_ebb_new_i64();
223
- tcg_gen_extu_i32_i64(a64, addr);
224
- return a64;
225
-#else
226
- return addr;
227
-#endif
228
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
229
+ TCGv_i64 a64 = tcg_temp_ebb_new_i64();
230
+ tcg_gen_extu_i32_i64(a64, temp_tcgv_i32(addr));
231
+ return a64;
232
+ }
233
+ return temp_tcgv_i64(addr);
234
}
235
236
static void maybe_free_addr64(TCGv_i64 a64)
237
{
238
-#if TARGET_LONG_BITS == 32
239
- tcg_temp_free_i64(a64);
240
-#endif
241
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
242
+ tcg_temp_free_i64(a64);
243
+ }
244
}
245
246
static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
247
@@ -XXX,XX +XXX,XX @@ static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
248
WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
249
};
250
251
-void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
252
- TCGv_i32 newv, TCGArg idx, MemOp memop)
253
+static void tcg_gen_nonatomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
254
+ TCGv_i32 cmpv, TCGv_i32 newv,
255
+ TCGArg idx, MemOp memop)
256
{
257
TCGv_i32 t1 = tcg_temp_ebb_new_i32();
258
TCGv_i32 t2 = tcg_temp_ebb_new_i32();
259
260
tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
261
262
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
263
+ tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop & ~MO_SIGN);
264
tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
265
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
266
+ tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
267
tcg_temp_free_i32(t2);
268
269
if (memop & MO_SIGN) {
270
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
271
tcg_temp_free_i32(t1);
272
}
273
274
-void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
275
- TCGv_i32 newv, TCGArg idx, MemOp memop)
276
+void tcg_gen_nonatomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
277
+ TCGv_i32 cmpv, TCGv_i32 newv,
278
+ TCGArg idx, MemOp memop,
279
+ TCGType addr_type)
34
+{
280
+{
35
+ if (arg_is_const(op->args[1])) {
281
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
36
+ uint64_t t;
282
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
37
+
283
+ tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
38
+ t = arg_info(op->args[1])->val;
39
+ t = do_constant_folding(op->opc, t, 0);
40
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
41
+ }
42
+ return false;
43
+}
284
+}
44
+
285
+
45
+static bool fold_const2(OptContext *ctx, TCGOp *op)
286
+static void tcg_gen_atomic_cmpxchg_i32_int(TCGv_i32 retv, TCGTemp *addr,
287
+ TCGv_i32 cmpv, TCGv_i32 newv,
288
+ TCGArg idx, MemOp memop)
289
{
290
gen_atomic_cx_i32 gen;
291
TCGv_i64 a64;
292
MemOpIdx oi;
293
294
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
295
- tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
296
+ tcg_gen_nonatomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
297
return;
298
}
299
300
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
301
}
302
}
303
304
-void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
305
- TCGv_i64 newv, TCGArg idx, MemOp memop)
306
+void tcg_gen_atomic_cmpxchg_i32_chk(TCGv_i32 retv, TCGTemp *addr,
307
+ TCGv_i32 cmpv, TCGv_i32 newv,
308
+ TCGArg idx, MemOp memop,
309
+ TCGType addr_type)
46
+{
310
+{
47
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
311
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
48
+ uint64_t t1 = arg_info(op->args[1])->val;
312
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32);
49
+ uint64_t t2 = arg_info(op->args[2])->val;
313
+ tcg_gen_atomic_cmpxchg_i32_int(retv, addr, cmpv, newv, idx, memop);
50
+
51
+ t1 = do_constant_folding(op->opc, t1, t2);
52
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
53
+ }
54
+ return false;
55
+}
314
+}
56
+
315
+
57
+/*
316
+static void tcg_gen_nonatomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
58
+ * These outermost fold_<op> functions are sorted alphabetically.
317
+ TCGv_i64 cmpv, TCGv_i64 newv,
59
+ */
318
+ TCGArg idx, MemOp memop)
60
+
319
{
61
+static bool fold_add(OptContext *ctx, TCGOp *op)
320
TCGv_i64 t1, t2;
321
322
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
323
- tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
324
- TCGV_LOW(newv), idx, memop);
325
+ tcg_gen_nonatomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
326
+ TCGV_LOW(newv), idx, memop);
327
if (memop & MO_SIGN) {
328
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
329
} else {
330
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
331
332
tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
333
334
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
335
+ tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop & ~MO_SIGN);
336
tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
337
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
338
+ tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
339
tcg_temp_free_i64(t2);
340
341
if (memop & MO_SIGN) {
342
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
343
tcg_temp_free_i64(t1);
344
}
345
346
-void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
347
- TCGv_i64 newv, TCGArg idx, MemOp memop)
348
+void tcg_gen_nonatomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
349
+ TCGv_i64 cmpv, TCGv_i64 newv,
350
+ TCGArg idx, MemOp memop,
351
+ TCGType addr_type)
62
+{
352
+{
63
+ return fold_const2(ctx, op);
353
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
354
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
355
+ tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
64
+}
356
+}
65
+
357
+
66
+static bool fold_and(OptContext *ctx, TCGOp *op)
358
+static void tcg_gen_atomic_cmpxchg_i64_int(TCGv_i64 retv, TCGTemp *addr,
359
+ TCGv_i64 cmpv, TCGv_i64 newv,
360
+ TCGArg idx, MemOp memop)
361
{
362
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
363
- tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
364
+ tcg_gen_nonatomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
365
return;
366
}
367
368
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
369
}
370
371
if (TCG_TARGET_REG_BITS == 32) {
372
- tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
373
- TCGV_LOW(newv), idx, memop);
374
+ tcg_gen_atomic_cmpxchg_i32_int(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
375
+ TCGV_LOW(newv), idx, memop);
376
if (memop & MO_SIGN) {
377
tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
378
} else {
379
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
380
381
tcg_gen_extrl_i64_i32(c32, cmpv);
382
tcg_gen_extrl_i64_i32(n32, newv);
383
- tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
384
+ tcg_gen_atomic_cmpxchg_i32_int(r32, addr, c32, n32,
385
+ idx, memop & ~MO_SIGN);
386
tcg_temp_free_i32(c32);
387
tcg_temp_free_i32(n32);
388
389
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
390
}
391
}
392
393
-void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
394
- TCGv_i128 newv, TCGArg idx, MemOp memop)
395
+void tcg_gen_atomic_cmpxchg_i64_chk(TCGv_i64 retv, TCGTemp *addr,
396
+ TCGv_i64 cmpv, TCGv_i64 newv,
397
+ TCGArg idx, MemOp memop, TCGType addr_type)
67
+{
398
+{
68
+ return fold_const2(ctx, op);
399
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
400
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64);
401
+ tcg_gen_atomic_cmpxchg_i64_int(retv, addr, cmpv, newv, idx, memop);
69
+}
402
+}
70
+
403
+
71
+static bool fold_andc(OptContext *ctx, TCGOp *op)
404
+static void tcg_gen_nonatomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
405
+ TCGv_i128 cmpv, TCGv_i128 newv,
406
+ TCGArg idx, MemOp memop)
407
{
408
if (TCG_TARGET_REG_BITS == 32) {
409
/* Inline expansion below is simply too large for 32-bit hosts. */
410
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
411
? gen_helper_nonatomic_cmpxchgo_le
412
: gen_helper_nonatomic_cmpxchgo_be);
413
MemOpIdx oi = make_memop_idx(memop, idx);
414
- TCGv_i64 a64;
415
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
416
417
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
418
- tcg_debug_assert((memop & MO_SIGN) == 0);
419
-
420
- a64 = maybe_extend_addr64(addr);
421
gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
422
maybe_free_addr64(a64);
423
} else {
424
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
425
TCGv_i64 t1 = tcg_temp_ebb_new_i64();
426
TCGv_i64 z = tcg_constant_i64(0);
427
428
- tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
429
+ tcg_gen_qemu_ld_i128_int(oldv, addr, idx, memop);
430
431
/* Compare i128 */
432
tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
433
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
434
TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
435
436
/* Unconditional writeback. */
437
- tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
438
+ tcg_gen_qemu_st_i128_int(tmpv, addr, idx, memop);
439
tcg_gen_mov_i128(retv, oldv);
440
441
tcg_temp_free_i64(t0);
442
@@ -XXX,XX +XXX,XX @@ void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
443
}
444
}
445
446
-void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
447
- TCGv_i128 newv, TCGArg idx, MemOp memop)
448
+void tcg_gen_nonatomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
449
+ TCGv_i128 cmpv, TCGv_i128 newv,
450
+ TCGArg idx, MemOp memop,
451
+ TCGType addr_type)
72
+{
452
+{
73
+ return fold_const2(ctx, op);
453
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
454
+ tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
455
+ tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
74
+}
456
+}
75
+
457
+
76
static bool fold_call(OptContext *ctx, TCGOp *op)
458
+static void tcg_gen_atomic_cmpxchg_i128_int(TCGv_i128 retv, TCGTemp *addr,
77
{
459
+ TCGv_i128 cmpv, TCGv_i128 newv,
78
TCGContext *s = ctx->tcg;
460
+ TCGArg idx, MemOp memop)
79
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
461
{
80
return true;
462
gen_atomic_cx_i128 gen;
81
}
463
82
464
if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
83
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
465
- tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
466
+ tcg_gen_nonatomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
467
return;
468
}
469
470
- tcg_debug_assert((memop & MO_SIZE) == MO_128);
471
- tcg_debug_assert((memop & MO_SIGN) == 0);
472
gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
473
-
474
if (gen) {
475
MemOpIdx oi = make_memop_idx(memop, idx);
476
TCGv_i64 a64 = maybe_extend_addr64(addr);
477
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
478
tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
479
}
480
481
-static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
482
+void tcg_gen_atomic_cmpxchg_i128_chk(TCGv_i128 retv, TCGTemp *addr,
483
+ TCGv_i128 cmpv, TCGv_i128 newv,
484
+ TCGArg idx, MemOp memop,
485
+ TCGType addr_type)
84
+{
486
+{
85
+ return fold_const1(ctx, op);
487
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type);
488
+ tcg_debug_assert((memop & (MO_SIZE | MO_SIGN)) == MO_128);
489
+ tcg_gen_atomic_cmpxchg_i128_int(retv, addr, cmpv, newv, idx, memop);
86
+}
490
+}
87
+
491
+
88
+static bool fold_divide(OptContext *ctx, TCGOp *op)
492
+static void do_nonatomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
89
+{
493
TCGArg idx, MemOp memop, bool new_val,
90
+ return fold_const2(ctx, op);
494
void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
91
+}
495
{
92
+
496
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
93
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
497
94
+{
498
memop = tcg_canonicalize_memop(memop, 0, 0);
95
+ return fold_const2(ctx, op);
499
96
+}
500
- tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
97
+
501
+ tcg_gen_qemu_ld_i32_int(t1, addr, idx, memop);
98
+static bool fold_exts(OptContext *ctx, TCGOp *op)
502
tcg_gen_ext_i32(t2, val, memop);
99
+{
503
gen(t2, t1, t2);
100
+ return fold_const1(ctx, op);
504
- tcg_gen_qemu_st_i32(t2, addr, idx, memop);
101
+}
505
+ tcg_gen_qemu_st_i32_int(t2, addr, idx, memop);
102
+
506
103
+static bool fold_extu(OptContext *ctx, TCGOp *op)
507
tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
104
+{
508
tcg_temp_free_i32(t1);
105
+ return fold_const1(ctx, op);
509
tcg_temp_free_i32(t2);
106
+}
510
}
107
+
511
108
static bool fold_mb(OptContext *ctx, TCGOp *op)
512
-static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
109
{
513
+static void do_atomic_op_i32(TCGv_i32 ret, TCGTemp *addr, TCGv_i32 val,
110
/* Eliminate duplicate and redundant fence instructions. */
514
TCGArg idx, MemOp memop, void * const table[])
111
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
515
{
112
return true;
516
gen_atomic_op_i32 gen;
113
}
517
@@ -XXX,XX +XXX,XX @@ static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
114
518
}
115
+static bool fold_mul(OptContext *ctx, TCGOp *op)
519
}
116
+{
520
117
+ return fold_const2(ctx, op);
521
-static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
118
+}
522
+static void do_nonatomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
119
+
523
TCGArg idx, MemOp memop, bool new_val,
120
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
524
void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
121
+{
525
{
122
+ return fold_const2(ctx, op);
526
@@ -XXX,XX +XXX,XX @@ static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
123
+}
527
124
+
528
memop = tcg_canonicalize_memop(memop, 1, 0);
125
+static bool fold_nand(OptContext *ctx, TCGOp *op)
529
126
+{
530
- tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
127
+ return fold_const2(ctx, op);
531
+ tcg_gen_qemu_ld_i64_int(t1, addr, idx, memop);
128
+}
532
tcg_gen_ext_i64(t2, val, memop);
129
+
533
gen(t2, t1, t2);
130
+static bool fold_neg(OptContext *ctx, TCGOp *op)
534
- tcg_gen_qemu_st_i64(t2, addr, idx, memop);
131
+{
535
+ tcg_gen_qemu_st_i64_int(t2, addr, idx, memop);
132
+ return fold_const1(ctx, op);
536
133
+}
537
tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
134
+
538
tcg_temp_free_i64(t1);
135
+static bool fold_nor(OptContext *ctx, TCGOp *op)
539
tcg_temp_free_i64(t2);
136
+{
540
}
137
+ return fold_const2(ctx, op);
541
138
+}
542
-static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
139
+
543
+static void do_atomic_op_i64(TCGv_i64 ret, TCGTemp *addr, TCGv_i64 val,
140
+static bool fold_not(OptContext *ctx, TCGOp *op)
544
TCGArg idx, MemOp memop, void * const table[])
141
+{
545
{
142
+ return fold_const1(ctx, op);
546
memop = tcg_canonicalize_memop(memop, 1, 0);
143
+}
547
144
+
548
if ((memop & MO_SIZE) == MO_64) {
145
+static bool fold_or(OptContext *ctx, TCGOp *op)
549
-#ifdef CONFIG_ATOMIC64
146
+{
550
- gen_atomic_op_i64 gen;
147
+ return fold_const2(ctx, op);
551
- TCGv_i64 a64;
148
+}
552
- MemOpIdx oi;
149
+
553
+ gen_atomic_op_i64 gen = table[memop & (MO_SIZE | MO_BSWAP)];
150
+static bool fold_orc(OptContext *ctx, TCGOp *op)
554
151
+{
555
- gen = table[memop & (MO_SIZE | MO_BSWAP)];
152
+ return fold_const2(ctx, op);
556
- tcg_debug_assert(gen != NULL);
153
+}
557
+ if (gen) {
154
+
558
+ MemOpIdx oi = make_memop_idx(memop & ~MO_SIGN, idx);
155
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
559
+ TCGv_i64 a64 = maybe_extend_addr64(addr);
156
{
560
+ gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
157
/* Opcodes that touch guest memory stop the mb optimization. */
561
+ maybe_free_addr64(a64);
158
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
562
+ return;
159
return false;
563
+ }
160
}
564
161
565
- oi = make_memop_idx(memop & ~MO_SIGN, idx);
162
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
566
- a64 = maybe_extend_addr64(addr);
163
+{
567
- gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
164
+ return fold_const2(ctx, op);
568
- maybe_free_addr64(a64);
165
+}
569
-#else
166
+
570
gen_helper_exit_atomic(cpu_env);
167
+static bool fold_shift(OptContext *ctx, TCGOp *op)
571
/* Produce a result, so that we have a well-formed opcode stream
168
+{
572
with respect to uses of the result in the (dead) code following. */
169
+ return fold_const2(ctx, op);
573
tcg_gen_movi_i64(ret, 0);
170
+}
574
-#endif /* CONFIG_ATOMIC64 */
171
+
575
} else {
172
+static bool fold_sub(OptContext *ctx, TCGOp *op)
576
TCGv_i32 v32 = tcg_temp_ebb_new_i32();
173
+{
577
TCGv_i32 r32 = tcg_temp_ebb_new_i32();
174
+ return fold_const2(ctx, op);
578
@@ -XXX,XX +XXX,XX @@ static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
175
+}
579
WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
176
+
580
WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
177
+static bool fold_xor(OptContext *ctx, TCGOp *op)
581
}; \
178
+{
582
-void tcg_gen_atomic_##NAME##_i32 \
179
+ return fold_const2(ctx, op);
583
- (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
180
+}
584
+void tcg_gen_atomic_##NAME##_i32_chk(TCGv_i32 ret, TCGTemp *addr, \
181
+
585
+ TCGv_i32 val, TCGArg idx, \
182
/* Propagate constants and copies, fold constant expressions. */
586
+ MemOp memop, TCGType addr_type) \
183
void tcg_optimize(TCGContext *s)
587
{ \
184
{
588
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type); \
185
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
589
+ tcg_debug_assert((memop & MO_SIZE) <= MO_32); \
186
}
590
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
187
break;
591
do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
188
592
} else { \
189
- CASE_OP_32_64(not):
593
@@ -XXX,XX +XXX,XX @@ void tcg_gen_atomic_##NAME##_i32 \
190
- CASE_OP_32_64(neg):
594
tcg_gen_##OP##_i32); \
191
- CASE_OP_32_64(ext8s):
595
} \
192
- CASE_OP_32_64(ext8u):
596
} \
193
- CASE_OP_32_64(ext16s):
597
-void tcg_gen_atomic_##NAME##_i64 \
194
- CASE_OP_32_64(ext16u):
598
- (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
195
- CASE_OP_32_64(ctpop):
599
+void tcg_gen_atomic_##NAME##_i64_chk(TCGv_i64 ret, TCGTemp *addr, \
196
- case INDEX_op_ext32s_i64:
600
+ TCGv_i64 val, TCGArg idx, \
197
- case INDEX_op_ext32u_i64:
601
+ MemOp memop, TCGType addr_type) \
198
- case INDEX_op_ext_i32_i64:
602
{ \
199
- case INDEX_op_extu_i32_i64:
603
+ tcg_debug_assert(addr_type == tcg_ctx->addr_type); \
200
- case INDEX_op_extrl_i64_i32:
604
+ tcg_debug_assert((memop & MO_SIZE) <= MO_64); \
201
- case INDEX_op_extrh_i64_i32:
605
if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
202
- if (arg_is_const(op->args[1])) {
606
do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
203
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
607
} else { \
204
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
205
- continue;
206
- }
207
- break;
208
-
209
CASE_OP_32_64(bswap16):
210
CASE_OP_32_64(bswap32):
211
case INDEX_op_bswap64_i64:
212
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
213
}
214
break;
215
216
- CASE_OP_32_64(add):
217
- CASE_OP_32_64(sub):
218
- CASE_OP_32_64(mul):
219
- CASE_OP_32_64(or):
220
- CASE_OP_32_64(and):
221
- CASE_OP_32_64(xor):
222
- CASE_OP_32_64(shl):
223
- CASE_OP_32_64(shr):
224
- CASE_OP_32_64(sar):
225
- CASE_OP_32_64(rotl):
226
- CASE_OP_32_64(rotr):
227
- CASE_OP_32_64(andc):
228
- CASE_OP_32_64(orc):
229
- CASE_OP_32_64(eqv):
230
- CASE_OP_32_64(nand):
231
- CASE_OP_32_64(nor):
232
- CASE_OP_32_64(muluh):
233
- CASE_OP_32_64(mulsh):
234
- CASE_OP_32_64(div):
235
- CASE_OP_32_64(divu):
236
- CASE_OP_32_64(rem):
237
- CASE_OP_32_64(remu):
238
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
239
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
240
- arg_info(op->args[2])->val);
241
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
242
- continue;
243
- }
244
- break;
245
-
246
CASE_OP_32_64(clz):
247
CASE_OP_32_64(ctz):
248
if (arg_is_const(op->args[1])) {
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
}
251
break;
252
253
+ default:
254
+ break;
255
+
256
+ /* ---------------------------------------------------------- */
257
+ /* Sorted alphabetically by opcode as much as possible. */
258
+
259
+ CASE_OP_32_64_VEC(add):
260
+ done = fold_add(&ctx, op);
261
+ break;
262
+ CASE_OP_32_64_VEC(and):
263
+ done = fold_and(&ctx, op);
264
+ break;
265
+ CASE_OP_32_64_VEC(andc):
266
+ done = fold_andc(&ctx, op);
267
+ break;
268
+ CASE_OP_32_64(ctpop):
269
+ done = fold_ctpop(&ctx, op);
270
+ break;
271
+ CASE_OP_32_64(div):
272
+ CASE_OP_32_64(divu):
273
+ done = fold_divide(&ctx, op);
274
+ break;
275
+ CASE_OP_32_64(eqv):
276
+ done = fold_eqv(&ctx, op);
277
+ break;
278
+ CASE_OP_32_64(ext8s):
279
+ CASE_OP_32_64(ext16s):
280
+ case INDEX_op_ext32s_i64:
281
+ case INDEX_op_ext_i32_i64:
282
+ done = fold_exts(&ctx, op);
283
+ break;
284
+ CASE_OP_32_64(ext8u):
285
+ CASE_OP_32_64(ext16u):
286
+ case INDEX_op_ext32u_i64:
287
+ case INDEX_op_extu_i32_i64:
288
+ case INDEX_op_extrl_i64_i32:
289
+ case INDEX_op_extrh_i64_i32:
290
+ done = fold_extu(&ctx, op);
291
+ break;
292
case INDEX_op_mb:
293
done = fold_mb(&ctx, op);
294
break;
295
+ CASE_OP_32_64(mul):
296
+ done = fold_mul(&ctx, op);
297
+ break;
298
+ CASE_OP_32_64(mulsh):
299
+ CASE_OP_32_64(muluh):
300
+ done = fold_mul_highpart(&ctx, op);
301
+ break;
302
+ CASE_OP_32_64(nand):
303
+ done = fold_nand(&ctx, op);
304
+ break;
305
+ CASE_OP_32_64(neg):
306
+ done = fold_neg(&ctx, op);
307
+ break;
308
+ CASE_OP_32_64(nor):
309
+ done = fold_nor(&ctx, op);
310
+ break;
311
+ CASE_OP_32_64_VEC(not):
312
+ done = fold_not(&ctx, op);
313
+ break;
314
+ CASE_OP_32_64_VEC(or):
315
+ done = fold_or(&ctx, op);
316
+ break;
317
+ CASE_OP_32_64_VEC(orc):
318
+ done = fold_orc(&ctx, op);
319
+ break;
320
case INDEX_op_qemu_ld_i32:
321
case INDEX_op_qemu_ld_i64:
322
done = fold_qemu_ld(&ctx, op);
323
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
324
case INDEX_op_qemu_st_i64:
325
done = fold_qemu_st(&ctx, op);
326
break;
327
-
328
- default:
329
+ CASE_OP_32_64(rem):
330
+ CASE_OP_32_64(remu):
331
+ done = fold_remainder(&ctx, op);
332
+ break;
333
+ CASE_OP_32_64(rotl):
334
+ CASE_OP_32_64(rotr):
335
+ CASE_OP_32_64(sar):
336
+ CASE_OP_32_64(shl):
337
+ CASE_OP_32_64(shr):
338
+ done = fold_shift(&ctx, op);
339
+ break;
340
+ CASE_OP_32_64_VEC(sub):
341
+ done = fold_sub(&ctx, op);
342
+ break;
343
+ CASE_OP_32_64_VEC(xor):
344
+ done = fold_xor(&ctx, op);
345
break;
346
}
347
348
--
608
--
349
2.25.1
609
2.34.1
350
610
351
611
diff view generated by jsdifflib
1
Prepare for tracking different masks by renaming this one.
1
For 32-bit hosts, we cannot simply rely on TCGContext.addr_bits,
2
as we need one or two host registers to represent the guest address.
3
4
Create the new opcodes and update all users. Since we have not
5
yet eliminated TARGET_LONG_BITS, only one of the two opcodes will
6
ever be used, so we can get away with treating them the same in
7
the backends.
2
8
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
11
---
8
tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
12
include/tcg/tcg-opc.h | 35 ++++++++----
9
1 file changed, 72 insertions(+), 70 deletions(-)
13
tcg/optimize.c | 19 +++++--
14
tcg/tcg-op-ldst.c | 83 ++++++++++++++++++++++-------
15
tcg/tcg.c | 42 ++++++++++-----
16
tcg/tci.c | 32 +++++++----
17
tcg/aarch64/tcg-target.c.inc | 36 ++++++++-----
18
tcg/arm/tcg-target.c.inc | 83 +++++++++++++++--------------
19
tcg/i386/tcg-target.c.inc | 91 ++++++++++++++++++++------------
20
tcg/loongarch64/tcg-target.c.inc | 24 ++++++---
21
tcg/mips/tcg-target.c.inc | 66 ++++++++++++++---------
22
tcg/ppc/tcg-target.c.inc | 91 +++++++++++++++++++-------------
23
tcg/riscv/tcg-target.c.inc | 24 ++++++---
24
tcg/s390x/tcg-target.c.inc | 36 ++++++++-----
25
tcg/sparc64/tcg-target.c.inc | 24 ++++++---
26
tcg/tci/tcg-target.c.inc | 44 ++++++++-------
27
15 files changed, 468 insertions(+), 262 deletions(-)
10
28
29
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/tcg/tcg-opc.h
32
+++ b/include/tcg/tcg-opc.h
33
@@ -XXX,XX +XXX,XX @@ DEF(muls2_i64, 2, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muls2_i64))
34
DEF(muluh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_muluh_i64))
35
DEF(mulsh_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_mulsh_i64))
36
37
-#define TLADDR_ARGS (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? 1 : 2)
38
#define DATA64_ARGS (TCG_TARGET_REG_BITS == 64 ? 1 : 2)
39
40
/* QEMU specific */
41
@@ -XXX,XX +XXX,XX @@ DEF(goto_ptr, 0, 1, 0, TCG_OPF_BB_EXIT | TCG_OPF_BB_END)
42
DEF(plugin_cb_start, 0, 0, 3, TCG_OPF_NOT_PRESENT)
43
DEF(plugin_cb_end, 0, 0, 0, TCG_OPF_NOT_PRESENT)
44
45
-DEF(qemu_ld_i32, 1, TLADDR_ARGS, 1,
46
+/* Replicate ld/st ops for 32 and 64-bit guest addresses. */
47
+DEF(qemu_ld_a32_i32, 1, 1, 1,
48
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
49
-DEF(qemu_st_i32, 0, TLADDR_ARGS + 1, 1,
50
+DEF(qemu_st_a32_i32, 0, 1 + 1, 1,
51
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
52
-DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
53
+DEF(qemu_ld_a32_i64, DATA64_ARGS, 1, 1,
54
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
55
-DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
56
+DEF(qemu_st_a32_i64, 0, DATA64_ARGS + 1, 1,
57
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
58
+
59
+DEF(qemu_ld_a64_i32, 1, DATA64_ARGS, 1,
60
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
61
+DEF(qemu_st_a64_i32, 0, 1 + DATA64_ARGS, 1,
62
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
63
+DEF(qemu_ld_a64_i64, DATA64_ARGS, DATA64_ARGS, 1,
64
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
65
+DEF(qemu_st_a64_i64, 0, DATA64_ARGS + DATA64_ARGS, 1,
66
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
67
68
/* Only used by i386 to cope with stupid register constraints. */
69
-DEF(qemu_st8_i32, 0, TLADDR_ARGS + 1, 1,
70
+DEF(qemu_st8_a32_i32, 0, 1 + 1, 1,
71
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
72
+ IMPL(TCG_TARGET_HAS_qemu_st8_i32))
73
+DEF(qemu_st8_a64_i32, 0, 1 + DATA64_ARGS, 1,
74
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS |
75
IMPL(TCG_TARGET_HAS_qemu_st8_i32))
76
77
/* Only for 64-bit hosts at the moment. */
78
-DEF(qemu_ld_i128, 2, 1, 1,
79
+DEF(qemu_ld_a32_i128, 2, 1, 1,
80
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
81
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
82
-DEF(qemu_st_i128, 0, 3, 1,
83
+DEF(qemu_ld_a64_i128, 2, 1, 1,
84
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
85
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
86
+DEF(qemu_st_a32_i128, 0, 3, 1,
87
+ TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
88
+ IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
89
+DEF(qemu_st_a64_i128, 0, 3, 1,
90
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT |
91
IMPL(TCG_TARGET_HAS_qemu_ldst_i128))
92
93
@@ -XXX,XX +XXX,XX @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT)
94
DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT)
95
#endif
96
97
-#undef TLADDR_ARGS
98
#undef DATA64_ARGS
99
#undef IMPL
100
#undef IMPL64
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
101
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
102
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
103
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
104
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
105
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
16
TCGTemp *prev_copy;
106
CASE_OP_32_64_VEC(orc):
17
TCGTemp *next_copy;
107
done = fold_orc(&ctx, op);
18
uint64_t val;
108
break;
19
- uint64_t mask;
109
- case INDEX_op_qemu_ld_i32:
20
+ uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
110
- case INDEX_op_qemu_ld_i64:
21
} TempOptInfo;
111
+ case INDEX_op_qemu_ld_a32_i32:
22
112
+ case INDEX_op_qemu_ld_a64_i32:
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
113
+ case INDEX_op_qemu_ld_a32_i64:
24
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
114
+ case INDEX_op_qemu_ld_a64_i64:
25
ti->next_copy = ts;
115
+ case INDEX_op_qemu_ld_a32_i128:
26
ti->prev_copy = ts;
116
+ case INDEX_op_qemu_ld_a64_i128:
27
ti->is_const = false;
117
done = fold_qemu_ld(&ctx, op);
28
- ti->mask = -1;
118
break;
29
+ ti->z_mask = -1;
119
- case INDEX_op_qemu_st_i32:
30
}
120
- case INDEX_op_qemu_st8_i32:
31
121
- case INDEX_op_qemu_st_i64:
32
static void reset_temp(TCGArg arg)
122
+ case INDEX_op_qemu_st8_a32_i32:
33
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
123
+ case INDEX_op_qemu_st8_a64_i32:
34
if (ts->kind == TEMP_CONST) {
124
+ case INDEX_op_qemu_st_a32_i32:
35
ti->is_const = true;
125
+ case INDEX_op_qemu_st_a64_i32:
36
ti->val = ts->val;
126
+ case INDEX_op_qemu_st_a32_i64:
37
- ti->mask = ts->val;
127
+ case INDEX_op_qemu_st_a64_i64:
38
+ ti->z_mask = ts->val;
128
+ case INDEX_op_qemu_st_a32_i128:
39
if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
129
+ case INDEX_op_qemu_st_a64_i128:
40
/* High bits of a 32-bit quantity are garbage. */
130
done = fold_qemu_st(&ctx, op);
41
- ti->mask |= ~0xffffffffull;
131
break;
42
+ ti->z_mask |= ~0xffffffffull;
132
CASE_OP_32_64(rem):
43
}
133
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/tcg/tcg-op-ldst.c
136
+++ b/tcg/tcg-op-ldst.c
137
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
138
MemOp orig_memop;
139
MemOpIdx orig_oi, oi;
140
TCGv_i64 copy_addr;
141
+ TCGOpcode opc;
142
143
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
144
orig_memop = memop = tcg_canonicalize_memop(memop, 0, 0);
145
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
146
}
147
148
copy_addr = plugin_maybe_preserve_addr(addr);
149
- gen_ldst(INDEX_op_qemu_ld_i32, tcgv_i32_temp(val), NULL, addr, oi);
150
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
151
+ opc = INDEX_op_qemu_ld_a32_i32;
152
+ } else {
153
+ opc = INDEX_op_qemu_ld_a64_i32;
154
+ }
155
+ gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
156
plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
157
158
if ((orig_memop ^ memop) & MO_BSWAP) {
159
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
160
}
161
162
if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
163
- opc = INDEX_op_qemu_st8_i32;
164
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
165
+ opc = INDEX_op_qemu_st8_a32_i32;
166
+ } else {
167
+ opc = INDEX_op_qemu_st8_a64_i32;
168
+ }
44
} else {
169
} else {
45
ti->is_const = false;
170
- opc = INDEX_op_qemu_st_i32;
46
- ti->mask = -1;
171
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
47
+ ti->z_mask = -1;
172
+ opc = INDEX_op_qemu_st_a32_i32;
173
+ } else {
174
+ opc = INDEX_op_qemu_st_a64_i32;
175
+ }
48
}
176
}
49
}
177
gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
50
178
plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
51
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
179
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
52
const TCGOpDef *def;
180
MemOp orig_memop;
53
TempOptInfo *di;
181
MemOpIdx orig_oi, oi;
54
TempOptInfo *si;
182
TCGv_i64 copy_addr;
55
- uint64_t mask;
183
+ TCGOpcode opc;
56
+ uint64_t z_mask;
184
57
TCGOpcode new_op;
185
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
58
186
tcg_gen_qemu_ld_i32_int(TCGV_LOW(val), addr, idx, memop);
59
if (ts_are_copies(dst_ts, src_ts)) {
187
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
60
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
61
op->args[0] = dst;
62
op->args[1] = src;
63
64
- mask = si->mask;
65
+ z_mask = si->z_mask;
66
if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
67
/* High bits of the destination are now garbage. */
68
- mask |= ~0xffffffffull;
69
+ z_mask |= ~0xffffffffull;
70
}
188
}
71
- di->mask = mask;
189
72
+ di->z_mask = z_mask;
190
copy_addr = plugin_maybe_preserve_addr(addr);
73
191
- gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, oi);
74
if (src_ts->type == dst_ts->type) {
192
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
75
TempOptInfo *ni = ts_info(si->next_copy);
193
+ opc = INDEX_op_qemu_ld_a32_i64;
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
194
+ } else {
195
+ opc = INDEX_op_qemu_ld_a64_i64;
196
+ }
197
+ gen_ldst_i64(opc, val, addr, oi);
198
plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
199
200
if ((orig_memop ^ memop) & MO_BSWAP) {
201
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
202
{
203
TCGv_i64 swap = NULL;
204
MemOpIdx orig_oi, oi;
205
+ TCGOpcode opc;
206
207
if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
208
tcg_gen_qemu_st_i32_int(TCGV_LOW(val), addr, idx, memop);
209
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
210
oi = make_memop_idx(memop, idx);
77
}
211
}
78
212
79
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
213
- gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, oi);
80
- uint64_t mask, partmask, affected, tmp;
214
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
81
+ uint64_t z_mask, partmask, affected, tmp;
215
+ opc = INDEX_op_qemu_st_a32_i64;
82
int nb_oargs, nb_iargs;
216
+ } else {
83
TCGOpcode opc = op->opc;
217
+ opc = INDEX_op_qemu_st_a64_i64;
84
const TCGOpDef *def = &tcg_op_defs[opc];
218
+ }
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
219
+ gen_ldst_i64(opc, val, addr, oi);
86
220
plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
87
/* Simplify using known-zero bits. Currently only ops with a single
221
88
output argument is supported. */
222
if (swap) {
89
- mask = -1;
223
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
90
+ z_mask = -1;
224
{
91
affected = -1;
225
const MemOpIdx orig_oi = make_memop_idx(memop, idx);
92
switch (opc) {
226
TCGv_i64 ext_addr = NULL;
93
CASE_OP_32_64(ext8s):
227
+ TCGOpcode opc;
94
- if ((arg_info(op->args[1])->mask & 0x80) != 0) {
228
95
+ if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
229
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
230
231
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
232
hi = TCGV128_HIGH(val);
233
}
234
235
- gen_ldst(INDEX_op_qemu_ld_i128, tcgv_i64_temp(lo),
236
- tcgv_i64_temp(hi), addr, oi);
237
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
238
+ opc = INDEX_op_qemu_ld_a32_i128;
239
+ } else {
240
+ opc = INDEX_op_qemu_ld_a64_i128;
241
+ }
242
+ gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
243
244
if (need_bswap) {
245
tcg_gen_bswap64_i64(lo, lo);
246
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
247
canonicalize_memop_i128_as_i64(mop, memop);
248
need_bswap = (mop[0] ^ memop) & MO_BSWAP;
249
250
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
251
+ opc = INDEX_op_qemu_ld_a32_i64;
252
+ } else {
253
+ opc = INDEX_op_qemu_ld_a64_i64;
254
+ }
255
+
256
/*
257
* Since there are no global TCGv_i128, there is no visible state
258
* changed if the second load faults. Load directly into the two
259
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
260
}
261
262
oi = make_memop_idx(mop[0], idx);
263
- gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, oi);
264
+ gen_ldst_i64(opc, x, addr, oi);
265
266
if (need_bswap) {
267
tcg_gen_bswap64_i64(x, x);
268
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
269
addr_p8 = tcgv_i64_temp(t);
270
}
271
272
- gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, oi);
273
+ gen_ldst_i64(opc, y, addr_p8, oi);
274
tcg_temp_free_internal(addr_p8);
275
276
if (need_bswap) {
277
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
278
{
279
const MemOpIdx orig_oi = make_memop_idx(memop, idx);
280
TCGv_i64 ext_addr = NULL;
281
+ TCGOpcode opc;
282
283
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
284
285
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
286
hi = TCGV128_HIGH(val);
287
}
288
289
- gen_ldst(INDEX_op_qemu_st_i128, tcgv_i64_temp(lo),
290
- tcgv_i64_temp(hi), addr, oi);
291
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
292
+ opc = INDEX_op_qemu_st_a32_i128;
293
+ } else {
294
+ opc = INDEX_op_qemu_st_a64_i128;
295
+ }
296
+ gen_ldst(opc, tcgv_i64_temp(lo), tcgv_i64_temp(hi), addr, oi);
297
298
if (need_bswap) {
299
tcg_temp_free_i64(lo);
300
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
301
302
canonicalize_memop_i128_as_i64(mop, memop);
303
304
+ if (tcg_ctx->addr_type == TCG_TYPE_I32) {
305
+ opc = INDEX_op_qemu_st_a32_i64;
306
+ } else {
307
+ opc = INDEX_op_qemu_st_a64_i64;
308
+ }
309
+
310
if ((memop & MO_BSWAP) == MO_LE) {
311
x = TCGV128_LOW(val);
312
y = TCGV128_HIGH(val);
313
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
314
tcg_gen_bswap64_i64(b, x);
315
x = b;
316
}
317
- gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr,
318
- make_memop_idx(mop[0], idx));
319
+
320
+ gen_ldst_i64(opc, x, addr, make_memop_idx(mop[0], idx));
321
322
if (tcg_ctx->addr_type == TCG_TYPE_I32) {
323
TCGv_i32 t = tcg_temp_ebb_new_i32();
324
@@ -XXX,XX +XXX,XX @@ static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
325
326
if (b) {
327
tcg_gen_bswap64_i64(b, y);
328
- y = b;
329
- }
330
- gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8,
331
- make_memop_idx(mop[1], idx));
332
-
333
- if (b) {
334
+ gen_ldst_i64(opc, b, addr_p8, make_memop_idx(mop[1], idx));
335
tcg_temp_free_i64(b);
336
+ } else {
337
+ gen_ldst_i64(opc, y, addr_p8, make_memop_idx(mop[1], idx));
338
}
339
tcg_temp_free_internal(addr_p8);
340
} else {
341
diff --git a/tcg/tcg.c b/tcg/tcg.c
342
index XXXXXXX..XXXXXXX 100644
343
--- a/tcg/tcg.c
344
+++ b/tcg/tcg.c
345
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
346
case INDEX_op_exit_tb:
347
case INDEX_op_goto_tb:
348
case INDEX_op_goto_ptr:
349
- case INDEX_op_qemu_ld_i32:
350
- case INDEX_op_qemu_st_i32:
351
- case INDEX_op_qemu_ld_i64:
352
- case INDEX_op_qemu_st_i64:
353
+ case INDEX_op_qemu_ld_a32_i32:
354
+ case INDEX_op_qemu_ld_a64_i32:
355
+ case INDEX_op_qemu_st_a32_i32:
356
+ case INDEX_op_qemu_st_a64_i32:
357
+ case INDEX_op_qemu_ld_a32_i64:
358
+ case INDEX_op_qemu_ld_a64_i64:
359
+ case INDEX_op_qemu_st_a32_i64:
360
+ case INDEX_op_qemu_st_a64_i64:
361
return true;
362
363
- case INDEX_op_qemu_st8_i32:
364
+ case INDEX_op_qemu_st8_a32_i32:
365
+ case INDEX_op_qemu_st8_a64_i32:
366
return TCG_TARGET_HAS_qemu_st8_i32;
367
368
- case INDEX_op_qemu_ld_i128:
369
- case INDEX_op_qemu_st_i128:
370
+ case INDEX_op_qemu_ld_a32_i128:
371
+ case INDEX_op_qemu_ld_a64_i128:
372
+ case INDEX_op_qemu_st_a32_i128:
373
+ case INDEX_op_qemu_st_a64_i128:
374
return TCG_TARGET_HAS_qemu_ldst_i128;
375
376
case INDEX_op_mov_i32:
377
@@ -XXX,XX +XXX,XX @@ static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
378
}
379
i = 1;
96
break;
380
break;
97
}
381
- case INDEX_op_qemu_ld_i32:
98
QEMU_FALLTHROUGH;
382
- case INDEX_op_qemu_st_i32:
99
CASE_OP_32_64(ext8u):
383
- case INDEX_op_qemu_st8_i32:
100
- mask = 0xff;
384
- case INDEX_op_qemu_ld_i64:
101
+ z_mask = 0xff;
385
- case INDEX_op_qemu_st_i64:
102
goto and_const;
386
- case INDEX_op_qemu_ld_i128:
103
CASE_OP_32_64(ext16s):
387
- case INDEX_op_qemu_st_i128:
104
- if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
388
+ case INDEX_op_qemu_ld_a32_i32:
105
+ if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
389
+ case INDEX_op_qemu_ld_a64_i32:
106
break;
390
+ case INDEX_op_qemu_st_a32_i32:
107
}
391
+ case INDEX_op_qemu_st_a64_i32:
108
QEMU_FALLTHROUGH;
392
+ case INDEX_op_qemu_st8_a32_i32:
109
CASE_OP_32_64(ext16u):
393
+ case INDEX_op_qemu_st8_a64_i32:
110
- mask = 0xffff;
394
+ case INDEX_op_qemu_ld_a32_i64:
111
+ z_mask = 0xffff;
395
+ case INDEX_op_qemu_ld_a64_i64:
112
goto and_const;
396
+ case INDEX_op_qemu_st_a32_i64:
113
case INDEX_op_ext32s_i64:
397
+ case INDEX_op_qemu_st_a64_i64:
114
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
398
+ case INDEX_op_qemu_ld_a32_i128:
115
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
399
+ case INDEX_op_qemu_ld_a64_i128:
116
break;
400
+ case INDEX_op_qemu_st_a32_i128:
117
}
401
+ case INDEX_op_qemu_st_a64_i128:
118
QEMU_FALLTHROUGH;
402
{
119
case INDEX_op_ext32u_i64:
403
const char *s_al, *s_op, *s_at;
120
- mask = 0xffffffffU;
404
MemOpIdx oi = op->args[k++];
121
+ z_mask = 0xffffffffU;
405
diff --git a/tcg/tci.c b/tcg/tci.c
122
goto and_const;
406
index XXXXXXX..XXXXXXX 100644
123
407
--- a/tcg/tci.c
124
CASE_OP_32_64(and):
408
+++ b/tcg/tci.c
125
- mask = arg_info(op->args[2])->mask;
409
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
126
+ z_mask = arg_info(op->args[2])->z_mask;
410
tb_ptr = ptr;
127
if (arg_is_const(op->args[2])) {
128
and_const:
129
- affected = arg_info(op->args[1])->mask & ~mask;
130
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
131
}
132
- mask = arg_info(op->args[1])->mask & mask;
133
+ z_mask = arg_info(op->args[1])->z_mask & z_mask;
134
break;
411
break;
135
412
136
case INDEX_op_ext_i32_i64:
413
- case INDEX_op_qemu_ld_i32:
137
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
414
+ case INDEX_op_qemu_ld_a32_i32:
138
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
415
+ case INDEX_op_qemu_ld_a64_i32:
139
break;
416
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
140
}
417
tci_args_rrm(insn, &r0, &r1, &oi);
141
QEMU_FALLTHROUGH;
418
taddr = regs[r1];
142
case INDEX_op_extu_i32_i64:
419
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
143
/* We do not compute affected as it is a size changing op. */
420
regs[r0] = tmp32;
144
- mask = (uint32_t)arg_info(op->args[1])->mask;
145
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
146
break;
421
break;
147
422
148
CASE_OP_32_64(andc):
423
- case INDEX_op_qemu_ld_i64:
149
/* Known-zeros does not imply known-ones. Therefore unless
424
+ case INDEX_op_qemu_ld_a32_i64:
150
op->args[2] is constant, we can't infer anything from it. */
425
+ case INDEX_op_qemu_ld_a64_i64:
151
if (arg_is_const(op->args[2])) {
426
if (TCG_TARGET_REG_BITS == 64) {
152
- mask = ~arg_info(op->args[2])->mask;
427
tci_args_rrm(insn, &r0, &r1, &oi);
153
+ z_mask = ~arg_info(op->args[2])->z_mask;
428
taddr = regs[r1];
154
goto and_const;
429
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
155
}
156
/* But we certainly know nothing outside args[1] may be set. */
157
- mask = arg_info(op->args[1])->mask;
158
+ z_mask = arg_info(op->args[1])->z_mask;
159
break;
160
161
case INDEX_op_sar_i32:
162
if (arg_is_const(op->args[2])) {
163
tmp = arg_info(op->args[2])->val & 31;
164
- mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
165
+ z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
166
}
430
}
167
break;
431
break;
168
case INDEX_op_sar_i64:
432
169
if (arg_is_const(op->args[2])) {
433
- case INDEX_op_qemu_st_i32:
170
tmp = arg_info(op->args[2])->val & 63;
434
+ case INDEX_op_qemu_st_a32_i32:
171
- mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
435
+ case INDEX_op_qemu_st_a64_i32:
172
+ z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
436
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
173
}
437
tci_args_rrm(insn, &r0, &r1, &oi);
438
taddr = regs[r1];
439
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
440
tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
174
break;
441
break;
175
442
176
case INDEX_op_shr_i32:
443
- case INDEX_op_qemu_st_i64:
177
if (arg_is_const(op->args[2])) {
444
+ case INDEX_op_qemu_st_a32_i64:
178
tmp = arg_info(op->args[2])->val & 31;
445
+ case INDEX_op_qemu_st_a64_i64:
179
- mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
446
if (TCG_TARGET_REG_BITS == 64) {
180
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
447
tci_args_rrm(insn, &r0, &r1, &oi);
181
}
448
taddr = regs[r1];
182
break;
449
@@ -XXX,XX +XXX,XX @@ int print_insn_tci(bfd_vma addr, disassemble_info *info)
183
case INDEX_op_shr_i64:
450
str_r(r3), str_r(r4), str_r(r5));
184
if (arg_is_const(op->args[2])) {
451
break;
185
tmp = arg_info(op->args[2])->val & 63;
452
186
- mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
453
- case INDEX_op_qemu_ld_i64:
187
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
454
- case INDEX_op_qemu_st_i64:
188
}
455
- len = DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
189
break;
456
+ case INDEX_op_qemu_ld_a32_i32:
190
457
+ case INDEX_op_qemu_st_a32_i32:
191
case INDEX_op_extrl_i64_i32:
458
+ len = 1 + 1;
192
- mask = (uint32_t)arg_info(op->args[1])->mask;
459
+ goto do_qemu_ldst;
193
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
460
+ case INDEX_op_qemu_ld_a32_i64:
194
break;
461
+ case INDEX_op_qemu_st_a32_i64:
195
case INDEX_op_extrh_i64_i32:
462
+ case INDEX_op_qemu_ld_a64_i32:
196
- mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
463
+ case INDEX_op_qemu_st_a64_i32:
197
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
464
+ len = 1 + DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
198
break;
465
+ goto do_qemu_ldst;
199
466
+ case INDEX_op_qemu_ld_a64_i64:
200
CASE_OP_32_64(shl):
467
+ case INDEX_op_qemu_st_a64_i64:
201
if (arg_is_const(op->args[2])) {
468
+ len = 2 * DIV_ROUND_UP(64, TCG_TARGET_REG_BITS);
202
tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
469
goto do_qemu_ldst;
203
- mask = arg_info(op->args[1])->mask << tmp;
470
- case INDEX_op_qemu_ld_i32:
204
+ z_mask = arg_info(op->args[1])->z_mask << tmp;
471
- case INDEX_op_qemu_st_i32:
205
}
472
- len = 1;
206
break;
473
do_qemu_ldst:
207
474
- len += DIV_ROUND_UP(TARGET_LONG_BITS, TCG_TARGET_REG_BITS);
208
CASE_OP_32_64(neg):
475
switch (len) {
209
/* Set to 1 all bits to the left of the rightmost. */
476
case 2:
210
- mask = -(arg_info(op->args[1])->mask
477
tci_args_rrm(insn, &r0, &r1, &oi);
211
- & -arg_info(op->args[1])->mask);
478
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
212
+ z_mask = -(arg_info(op->args[1])->z_mask
479
index XXXXXXX..XXXXXXX 100644
213
+ & -arg_info(op->args[1])->z_mask);
480
--- a/tcg/aarch64/tcg-target.c.inc
214
break;
481
+++ b/tcg/aarch64/tcg-target.c.inc
215
482
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
216
CASE_OP_32_64(deposit):
483
tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
217
- mask = deposit64(arg_info(op->args[1])->mask,
484
break;
218
- op->args[3], op->args[4],
485
219
- arg_info(op->args[2])->mask);
486
- case INDEX_op_qemu_ld_i32:
220
+ z_mask = deposit64(arg_info(op->args[1])->z_mask,
487
- case INDEX_op_qemu_ld_i64:
221
+ op->args[3], op->args[4],
488
+ case INDEX_op_qemu_ld_a32_i32:
222
+ arg_info(op->args[2])->z_mask);
489
+ case INDEX_op_qemu_ld_a64_i32:
223
break;
490
+ case INDEX_op_qemu_ld_a32_i64:
224
491
+ case INDEX_op_qemu_ld_a64_i64:
225
CASE_OP_32_64(extract):
492
tcg_out_qemu_ld(s, a0, a1, a2, ext);
226
- mask = extract64(arg_info(op->args[1])->mask,
493
break;
227
- op->args[2], op->args[3]);
494
- case INDEX_op_qemu_st_i32:
228
+ z_mask = extract64(arg_info(op->args[1])->z_mask,
495
- case INDEX_op_qemu_st_i64:
229
+ op->args[2], op->args[3]);
496
+ case INDEX_op_qemu_st_a32_i32:
230
if (op->args[2] == 0) {
497
+ case INDEX_op_qemu_st_a64_i32:
231
- affected = arg_info(op->args[1])->mask & ~mask;
498
+ case INDEX_op_qemu_st_a32_i64:
232
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
499
+ case INDEX_op_qemu_st_a64_i64:
233
}
500
tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
234
break;
501
break;
235
CASE_OP_32_64(sextract):
502
- case INDEX_op_qemu_ld_i128:
236
- mask = sextract64(arg_info(op->args[1])->mask,
503
+ case INDEX_op_qemu_ld_a32_i128:
237
- op->args[2], op->args[3]);
504
+ case INDEX_op_qemu_ld_a64_i128:
238
- if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
505
tcg_out_qemu_ld128(s, a0, a1, a2, args[3]);
239
- affected = arg_info(op->args[1])->mask & ~mask;
506
break;
240
+ z_mask = sextract64(arg_info(op->args[1])->z_mask,
507
- case INDEX_op_qemu_st_i128:
241
+ op->args[2], op->args[3]);
508
+ case INDEX_op_qemu_st_a32_i128:
242
+ if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
509
+ case INDEX_op_qemu_st_a64_i128:
243
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
510
tcg_out_qemu_st128(s, REG0(0), REG0(1), a2, args[3]);
244
}
511
break;
245
break;
512
246
513
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
247
CASE_OP_32_64(or):
514
case INDEX_op_movcond_i64:
248
CASE_OP_32_64(xor):
515
return C_O1_I4(r, r, rA, rZ, rZ);
249
- mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
516
250
+ z_mask = arg_info(op->args[1])->z_mask
517
- case INDEX_op_qemu_ld_i32:
251
+ | arg_info(op->args[2])->z_mask;
518
- case INDEX_op_qemu_ld_i64:
252
break;
519
+ case INDEX_op_qemu_ld_a32_i32:
253
520
+ case INDEX_op_qemu_ld_a64_i32:
254
case INDEX_op_clz_i32:
521
+ case INDEX_op_qemu_ld_a32_i64:
255
case INDEX_op_ctz_i32:
522
+ case INDEX_op_qemu_ld_a64_i64:
256
- mask = arg_info(op->args[2])->mask | 31;
523
return C_O1_I1(r, l);
257
+ z_mask = arg_info(op->args[2])->z_mask | 31;
524
- case INDEX_op_qemu_ld_i128:
258
break;
525
+ case INDEX_op_qemu_ld_a32_i128:
259
526
+ case INDEX_op_qemu_ld_a64_i128:
260
case INDEX_op_clz_i64:
527
return C_O2_I1(r, r, l);
261
case INDEX_op_ctz_i64:
528
- case INDEX_op_qemu_st_i32:
262
- mask = arg_info(op->args[2])->mask | 63;
529
- case INDEX_op_qemu_st_i64:
263
+ z_mask = arg_info(op->args[2])->z_mask | 63;
530
+ case INDEX_op_qemu_st_a32_i32:
264
break;
531
+ case INDEX_op_qemu_st_a64_i32:
265
532
+ case INDEX_op_qemu_st_a32_i64:
266
case INDEX_op_ctpop_i32:
533
+ case INDEX_op_qemu_st_a64_i64:
267
- mask = 32 | 31;
534
return C_O0_I2(lZ, l);
268
+ z_mask = 32 | 31;
535
- case INDEX_op_qemu_st_i128:
269
break;
536
+ case INDEX_op_qemu_st_a32_i128:
270
case INDEX_op_ctpop_i64:
537
+ case INDEX_op_qemu_st_a64_i128:
271
- mask = 64 | 63;
538
return C_O0_I3(lZ, lZ, l);
272
+ z_mask = 64 | 63;
539
273
break;
540
case INDEX_op_deposit_i32:
274
541
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
275
CASE_OP_32_64(setcond):
542
index XXXXXXX..XXXXXXX 100644
276
case INDEX_op_setcond2_i32:
543
--- a/tcg/arm/tcg-target.c.inc
277
- mask = 1;
544
+++ b/tcg/arm/tcg-target.c.inc
278
+ z_mask = 1;
545
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
279
break;
546
ARITH_MOV, args[0], 0, 0);
280
547
break;
281
CASE_OP_32_64(movcond):
548
282
- mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
549
- case INDEX_op_qemu_ld_i32:
283
+ z_mask = arg_info(op->args[3])->z_mask
550
- if (TARGET_LONG_BITS == 32) {
284
+ | arg_info(op->args[4])->z_mask;
551
- tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
285
break;
552
- args[2], TCG_TYPE_I32);
286
553
- } else {
287
CASE_OP_32_64(ld8u):
554
- tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
288
- mask = 0xff;
555
- args[3], TCG_TYPE_I32);
289
+ z_mask = 0xff;
556
- }
290
break;
557
+ case INDEX_op_qemu_ld_a32_i32:
291
CASE_OP_32_64(ld16u):
558
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
292
- mask = 0xffff;
559
break;
293
+ z_mask = 0xffff;
560
- case INDEX_op_qemu_ld_i64:
294
break;
561
- if (TARGET_LONG_BITS == 32) {
295
case INDEX_op_ld32u_i64:
562
- tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
296
- mask = 0xffffffffu;
563
- args[3], TCG_TYPE_I64);
297
+ z_mask = 0xffffffffu;
564
- } else {
298
break;
565
- tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
299
566
- args[4], TCG_TYPE_I64);
300
CASE_OP_32_64(qemu_ld):
567
- }
301
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
568
+ case INDEX_op_qemu_ld_a64_i32:
302
MemOpIdx oi = op->args[nb_oargs + nb_iargs];
569
+ tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
303
MemOp mop = get_memop(oi);
570
+ args[3], TCG_TYPE_I32);
304
if (!(mop & MO_SIGN)) {
571
break;
305
- mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
572
- case INDEX_op_qemu_st_i32:
306
+ z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
573
- if (TARGET_LONG_BITS == 32) {
307
}
574
- tcg_out_qemu_st(s, args[0], -1, args[1], -1,
308
}
575
- args[2], TCG_TYPE_I32);
309
break;
576
- } else {
310
577
- tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
311
CASE_OP_32_64(bswap16):
578
- args[3], TCG_TYPE_I32);
312
- mask = arg_info(op->args[1])->mask;
579
- }
313
- if (mask <= 0xffff) {
580
+ case INDEX_op_qemu_ld_a32_i64:
314
+ z_mask = arg_info(op->args[1])->z_mask;
581
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
315
+ if (z_mask <= 0xffff) {
582
+ args[3], TCG_TYPE_I64);
316
op->args[2] |= TCG_BSWAP_IZ;
583
break;
317
}
584
- case INDEX_op_qemu_st_i64:
318
- mask = bswap16(mask);
585
- if (TARGET_LONG_BITS == 32) {
319
+ z_mask = bswap16(z_mask);
586
- tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
320
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
587
- args[3], TCG_TYPE_I64);
321
case TCG_BSWAP_OZ:
588
- } else {
322
break;
589
- tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
323
case TCG_BSWAP_OS:
590
- args[4], TCG_TYPE_I64);
324
- mask = (int16_t)mask;
591
- }
325
+ z_mask = (int16_t)z_mask;
592
+ case INDEX_op_qemu_ld_a64_i64:
326
break;
593
+ tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
327
default: /* undefined high bits */
594
+ args[4], TCG_TYPE_I64);
328
- mask |= MAKE_64BIT_MASK(16, 48);
595
+ break;
329
+ z_mask |= MAKE_64BIT_MASK(16, 48);
596
+
330
break;
597
+ case INDEX_op_qemu_st_a32_i32:
331
}
598
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
332
break;
599
+ break;
333
600
+ case INDEX_op_qemu_st_a64_i32:
334
case INDEX_op_bswap32_i64:
601
+ tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
335
- mask = arg_info(op->args[1])->mask;
602
+ args[3], TCG_TYPE_I32);
336
- if (mask <= 0xffffffffu) {
603
+ break;
337
+ z_mask = arg_info(op->args[1])->z_mask;
604
+ case INDEX_op_qemu_st_a32_i64:
338
+ if (z_mask <= 0xffffffffu) {
605
+ tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
339
op->args[2] |= TCG_BSWAP_IZ;
606
+ args[3], TCG_TYPE_I64);
340
}
607
+ break;
341
- mask = bswap32(mask);
608
+ case INDEX_op_qemu_st_a64_i64:
342
+ z_mask = bswap32(z_mask);
609
+ tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
343
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
610
+ args[4], TCG_TYPE_I64);
344
case TCG_BSWAP_OZ:
611
break;
345
break;
612
346
case TCG_BSWAP_OS:
613
case INDEX_op_bswap16_i32:
347
- mask = (int32_t)mask;
614
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
348
+ z_mask = (int32_t)z_mask;
615
case INDEX_op_setcond2_i32:
349
break;
616
return C_O1_I4(r, r, r, rI, rI);
350
default: /* undefined high bits */
617
351
- mask |= MAKE_64BIT_MASK(32, 32);
618
- case INDEX_op_qemu_ld_i32:
352
+ z_mask |= MAKE_64BIT_MASK(32, 32);
619
- return TARGET_LONG_BITS == 32 ? C_O1_I1(r, q) : C_O1_I2(r, q, q);
353
break;
620
- case INDEX_op_qemu_ld_i64:
354
}
621
- return TARGET_LONG_BITS == 32 ? C_O2_I1(e, p, q) : C_O2_I2(e, p, q, q);
355
break;
622
- case INDEX_op_qemu_st_i32:
356
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
623
- return TARGET_LONG_BITS == 32 ? C_O0_I2(q, q) : C_O0_I3(q, q, q);
357
/* 32-bit ops generate 32-bit results. For the result is zero test
624
- case INDEX_op_qemu_st_i64:
358
below, we can ignore high bits, but for further optimizations we
625
- return TARGET_LONG_BITS == 32 ? C_O0_I3(Q, p, q) : C_O0_I4(Q, p, q, q);
359
need to record that the high bits contain garbage. */
626
+ case INDEX_op_qemu_ld_a32_i32:
360
- partmask = mask;
627
+ return C_O1_I1(r, q);
361
+ partmask = z_mask;
628
+ case INDEX_op_qemu_ld_a64_i32:
362
if (!(def->flags & TCG_OPF_64BIT)) {
629
+ return C_O1_I2(r, q, q);
363
- mask |= ~(tcg_target_ulong)0xffffffffu;
630
+ case INDEX_op_qemu_ld_a32_i64:
364
+ z_mask |= ~(tcg_target_ulong)0xffffffffu;
631
+ return C_O2_I1(e, p, q);
365
partmask &= 0xffffffffu;
632
+ case INDEX_op_qemu_ld_a64_i64:
366
affected &= 0xffffffffu;
633
+ return C_O2_I2(e, p, q, q);
367
}
634
+ case INDEX_op_qemu_st_a32_i32:
368
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
635
+ return C_O0_I2(q, q);
369
vs the high word of the input. */
636
+ case INDEX_op_qemu_st_a64_i32:
370
do_setcond_high:
637
+ return C_O0_I3(q, q, q);
371
reset_temp(op->args[0]);
638
+ case INDEX_op_qemu_st_a32_i64:
372
- arg_info(op->args[0])->mask = 1;
639
+ return C_O0_I3(Q, p, q);
373
+ arg_info(op->args[0])->z_mask = 1;
640
+ case INDEX_op_qemu_st_a64_i64:
374
op->opc = INDEX_op_setcond_i32;
641
+ return C_O0_I4(Q, p, q, q);
375
op->args[1] = op->args[2];
642
376
op->args[2] = op->args[4];
643
case INDEX_op_st_vec:
377
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
644
return C_O0_I2(w, r);
378
}
645
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
379
do_setcond_low:
646
index XXXXXXX..XXXXXXX 100644
380
reset_temp(op->args[0]);
647
--- a/tcg/i386/tcg-target.c.inc
381
- arg_info(op->args[0])->mask = 1;
648
+++ b/tcg/i386/tcg-target.c.inc
382
+ arg_info(op->args[0])->z_mask = 1;
649
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
383
op->opc = INDEX_op_setcond_i32;
650
tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, a0);
384
op->args[2] = op->args[3];
651
break;
385
op->args[3] = op->args[5];
652
386
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
653
- case INDEX_op_qemu_ld_i32:
387
/* Default case: we know nothing about operation (or were unable
654
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
388
to compute the operation result) so no propagation is done.
655
- tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
389
We trash everything if the operation is the end of a basic
656
- } else {
390
- block, otherwise we only trash the output args. "mask" is
657
+ case INDEX_op_qemu_ld_a64_i32:
391
+ block, otherwise we only trash the output args. "z_mask" is
658
+ if (TCG_TARGET_REG_BITS == 32) {
392
the non-zero bits mask for the first output arg. */
659
tcg_out_qemu_ld(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
393
if (def->flags & TCG_OPF_BB_END) {
660
+ break;
394
memset(&temps_used, 0, sizeof(temps_used));
661
}
395
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
662
+ /* fall through */
396
/* Save the corresponding known-zero bits mask for the
663
+ case INDEX_op_qemu_ld_a32_i32:
397
first output argument (only one supported so far). */
664
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
398
if (i == 0) {
665
break;
399
- arg_info(op->args[i])->mask = mask;
666
- case INDEX_op_qemu_ld_i64:
400
+ arg_info(op->args[i])->z_mask = z_mask;
667
+ case INDEX_op_qemu_ld_a32_i64:
401
}
668
if (TCG_TARGET_REG_BITS == 64) {
402
}
669
tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
403
}
670
- } else if (TARGET_LONG_BITS == 32) {
671
+ } else {
672
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
673
+ }
674
+ break;
675
+ case INDEX_op_qemu_ld_a64_i64:
676
+ if (TCG_TARGET_REG_BITS == 64) {
677
+ tcg_out_qemu_ld(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
678
} else {
679
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
680
}
681
break;
682
- case INDEX_op_qemu_ld_i128:
683
+ case INDEX_op_qemu_ld_a32_i128:
684
+ case INDEX_op_qemu_ld_a64_i128:
685
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
686
tcg_out_qemu_ld(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
687
break;
688
- case INDEX_op_qemu_st_i32:
689
- case INDEX_op_qemu_st8_i32:
690
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
691
- tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
692
- } else {
693
+
694
+ case INDEX_op_qemu_st_a64_i32:
695
+ case INDEX_op_qemu_st8_a64_i32:
696
+ if (TCG_TARGET_REG_BITS == 32) {
697
tcg_out_qemu_st(s, a0, -1, a1, a2, args[3], TCG_TYPE_I32);
698
+ break;
699
}
700
+ /* fall through */
701
+ case INDEX_op_qemu_st_a32_i32:
702
+ case INDEX_op_qemu_st8_a32_i32:
703
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I32);
704
break;
705
- case INDEX_op_qemu_st_i64:
706
+ case INDEX_op_qemu_st_a32_i64:
707
if (TCG_TARGET_REG_BITS == 64) {
708
tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
709
- } else if (TARGET_LONG_BITS == 32) {
710
+ } else {
711
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I64);
712
+ }
713
+ break;
714
+ case INDEX_op_qemu_st_a64_i64:
715
+ if (TCG_TARGET_REG_BITS == 64) {
716
+ tcg_out_qemu_st(s, a0, -1, a1, -1, a2, TCG_TYPE_I64);
717
} else {
718
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
719
}
720
break;
721
- case INDEX_op_qemu_st_i128:
722
+ case INDEX_op_qemu_st_a32_i128:
723
+ case INDEX_op_qemu_st_a64_i128:
724
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
725
tcg_out_qemu_st(s, a0, a1, a2, -1, args[3], TCG_TYPE_I128);
726
break;
727
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
728
case INDEX_op_clz_i64:
729
return have_lzcnt ? C_N1_I2(r, r, rW) : C_N1_I2(r, r, r);
730
731
- case INDEX_op_qemu_ld_i32:
732
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
733
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
734
+ case INDEX_op_qemu_ld_a32_i32:
735
+ return C_O1_I1(r, L);
736
+ case INDEX_op_qemu_ld_a64_i32:
737
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O1_I2(r, L, L);
738
739
- case INDEX_op_qemu_st_i32:
740
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
741
- ? C_O0_I2(L, L) : C_O0_I3(L, L, L));
742
- case INDEX_op_qemu_st8_i32:
743
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
744
- ? C_O0_I2(s, L) : C_O0_I3(s, L, L));
745
+ case INDEX_op_qemu_st_a32_i32:
746
+ return C_O0_I2(L, L);
747
+ case INDEX_op_qemu_st_a64_i32:
748
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
749
+ case INDEX_op_qemu_st8_a32_i32:
750
+ return C_O0_I2(s, L);
751
+ case INDEX_op_qemu_st8_a64_i32:
752
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(s, L) : C_O0_I3(s, L, L);
753
754
- case INDEX_op_qemu_ld_i64:
755
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
756
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, L)
757
- : C_O2_I2(r, r, L, L));
758
+ case INDEX_op_qemu_ld_a32_i64:
759
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I1(r, r, L);
760
+ case INDEX_op_qemu_ld_a64_i64:
761
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L) : C_O2_I2(r, r, L, L);
762
763
- case INDEX_op_qemu_st_i64:
764
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L)
765
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(L, L, L)
766
- : C_O0_I4(L, L, L, L));
767
+ case INDEX_op_qemu_st_a32_i64:
768
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I3(L, L, L);
769
+ case INDEX_op_qemu_st_a64_i64:
770
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(L, L) : C_O0_I4(L, L, L, L);
771
772
- case INDEX_op_qemu_ld_i128:
773
+ case INDEX_op_qemu_ld_a32_i128:
774
+ case INDEX_op_qemu_ld_a64_i128:
775
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
776
return C_O2_I1(r, r, L);
777
- case INDEX_op_qemu_st_i128:
778
+ case INDEX_op_qemu_st_a32_i128:
779
+ case INDEX_op_qemu_st_a64_i128:
780
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
781
return C_O0_I3(L, L, L);
782
783
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
784
index XXXXXXX..XXXXXXX 100644
785
--- a/tcg/loongarch64/tcg-target.c.inc
786
+++ b/tcg/loongarch64/tcg-target.c.inc
787
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
788
tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
789
break;
790
791
- case INDEX_op_qemu_ld_i32:
792
+ case INDEX_op_qemu_ld_a32_i32:
793
+ case INDEX_op_qemu_ld_a64_i32:
794
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
795
break;
796
- case INDEX_op_qemu_ld_i64:
797
+ case INDEX_op_qemu_ld_a32_i64:
798
+ case INDEX_op_qemu_ld_a64_i64:
799
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
800
break;
801
- case INDEX_op_qemu_st_i32:
802
+ case INDEX_op_qemu_st_a32_i32:
803
+ case INDEX_op_qemu_st_a64_i32:
804
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
805
break;
806
- case INDEX_op_qemu_st_i64:
807
+ case INDEX_op_qemu_st_a32_i64:
808
+ case INDEX_op_qemu_st_a64_i64:
809
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
810
break;
811
812
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
813
case INDEX_op_st32_i64:
814
case INDEX_op_st_i32:
815
case INDEX_op_st_i64:
816
- case INDEX_op_qemu_st_i32:
817
- case INDEX_op_qemu_st_i64:
818
+ case INDEX_op_qemu_st_a32_i32:
819
+ case INDEX_op_qemu_st_a64_i32:
820
+ case INDEX_op_qemu_st_a32_i64:
821
+ case INDEX_op_qemu_st_a64_i64:
822
return C_O0_I2(rZ, r);
823
824
case INDEX_op_brcond_i32:
825
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
826
case INDEX_op_ld32u_i64:
827
case INDEX_op_ld_i32:
828
case INDEX_op_ld_i64:
829
- case INDEX_op_qemu_ld_i32:
830
- case INDEX_op_qemu_ld_i64:
831
+ case INDEX_op_qemu_ld_a32_i32:
832
+ case INDEX_op_qemu_ld_a64_i32:
833
+ case INDEX_op_qemu_ld_a32_i64:
834
+ case INDEX_op_qemu_ld_a64_i64:
835
return C_O1_I1(r, r);
836
837
case INDEX_op_andc_i32:
838
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
839
index XXXXXXX..XXXXXXX 100644
840
--- a/tcg/mips/tcg-target.c.inc
841
+++ b/tcg/mips/tcg-target.c.inc
842
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
843
tcg_out_setcond2(s, args[5], a0, a1, a2, args[3], args[4]);
844
break;
845
846
- case INDEX_op_qemu_ld_i32:
847
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
848
- tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
849
- } else {
850
+ case INDEX_op_qemu_ld_a64_i32:
851
+ if (TCG_TARGET_REG_BITS == 32) {
852
tcg_out_qemu_ld(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
853
+ break;
854
}
855
+ /* fall through */
856
+ case INDEX_op_qemu_ld_a32_i32:
857
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
858
break;
859
- case INDEX_op_qemu_ld_i64:
860
+ case INDEX_op_qemu_ld_a32_i64:
861
if (TCG_TARGET_REG_BITS == 64) {
862
tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
863
- } else if (TARGET_LONG_BITS == 32) {
864
+ } else {
865
tcg_out_qemu_ld(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
866
+ }
867
+ break;
868
+ case INDEX_op_qemu_ld_a64_i64:
869
+ if (TCG_TARGET_REG_BITS == 64) {
870
+ tcg_out_qemu_ld(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
871
} else {
872
tcg_out_qemu_ld(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
873
}
874
break;
875
- case INDEX_op_qemu_st_i32:
876
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
877
- tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
878
- } else {
879
+
880
+ case INDEX_op_qemu_st_a64_i32:
881
+ if (TCG_TARGET_REG_BITS == 32) {
882
tcg_out_qemu_st(s, a0, 0, a1, a2, args[3], TCG_TYPE_I32);
883
+ break;
884
}
885
+ /* fall through */
886
+ case INDEX_op_qemu_st_a32_i32:
887
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I32);
888
break;
889
- case INDEX_op_qemu_st_i64:
890
+ case INDEX_op_qemu_st_a32_i64:
891
if (TCG_TARGET_REG_BITS == 64) {
892
tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
893
- } else if (TARGET_LONG_BITS == 32) {
894
+ } else {
895
tcg_out_qemu_st(s, a0, a1, a2, 0, args[3], TCG_TYPE_I64);
896
+ }
897
+ break;
898
+ case INDEX_op_qemu_st_a64_i64:
899
+ if (TCG_TARGET_REG_BITS == 64) {
900
+ tcg_out_qemu_st(s, a0, 0, a1, 0, a2, TCG_TYPE_I64);
901
} else {
902
tcg_out_qemu_st(s, a0, a1, a2, args[3], args[4], TCG_TYPE_I64);
903
}
904
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
905
case INDEX_op_brcond2_i32:
906
return C_O0_I4(rZ, rZ, rZ, rZ);
907
908
- case INDEX_op_qemu_ld_i32:
909
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
910
- ? C_O1_I1(r, r) : C_O1_I2(r, r, r));
911
- case INDEX_op_qemu_st_i32:
912
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
913
- ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
914
- case INDEX_op_qemu_ld_i64:
915
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
916
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
917
- : C_O2_I2(r, r, r, r));
918
- case INDEX_op_qemu_st_i64:
919
+ case INDEX_op_qemu_ld_a32_i32:
920
+ return C_O1_I1(r, r);
921
+ case INDEX_op_qemu_ld_a64_i32:
922
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
923
+ case INDEX_op_qemu_st_a32_i32:
924
+ return C_O0_I2(rZ, r);
925
+ case INDEX_op_qemu_st_a64_i32:
926
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r);
927
+ case INDEX_op_qemu_ld_a32_i64:
928
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
929
+ case INDEX_op_qemu_ld_a64_i64:
930
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
931
+ case INDEX_op_qemu_st_a32_i64:
932
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r) : C_O0_I3(rZ, rZ, r);
933
+ case INDEX_op_qemu_st_a64_i64:
934
return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
935
- : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
936
: C_O0_I4(rZ, rZ, r, r));
937
938
default:
939
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
940
index XXXXXXX..XXXXXXX 100644
941
--- a/tcg/ppc/tcg-target.c.inc
942
+++ b/tcg/ppc/tcg-target.c.inc
943
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
944
tcg_out32(s, MODUD | TAB(args[0], args[1], args[2]));
945
break;
946
947
- case INDEX_op_qemu_ld_i32:
948
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
949
- tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
950
- args[2], TCG_TYPE_I32);
951
- } else {
952
+ case INDEX_op_qemu_ld_a64_i32:
953
+ if (TCG_TARGET_REG_BITS == 32) {
954
tcg_out_qemu_ld(s, args[0], -1, args[1], args[2],
955
args[3], TCG_TYPE_I32);
956
+ break;
957
}
958
+ /* fall through */
959
+ case INDEX_op_qemu_ld_a32_i32:
960
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
961
break;
962
- case INDEX_op_qemu_ld_i64:
963
+ case INDEX_op_qemu_ld_a32_i64:
964
if (TCG_TARGET_REG_BITS == 64) {
965
tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
966
args[2], TCG_TYPE_I64);
967
- } else if (TARGET_LONG_BITS == 32) {
968
+ } else {
969
tcg_out_qemu_ld(s, args[0], args[1], args[2], -1,
970
args[3], TCG_TYPE_I64);
971
+ }
972
+ break;
973
+ case INDEX_op_qemu_ld_a64_i64:
974
+ if (TCG_TARGET_REG_BITS == 64) {
975
+ tcg_out_qemu_ld(s, args[0], -1, args[1], -1,
976
+ args[2], TCG_TYPE_I64);
977
} else {
978
tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3],
979
args[4], TCG_TYPE_I64);
980
}
981
break;
982
- case INDEX_op_qemu_ld_i128:
983
+ case INDEX_op_qemu_ld_a32_i128:
984
+ case INDEX_op_qemu_ld_a64_i128:
985
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
986
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
987
break;
988
989
- case INDEX_op_qemu_st_i32:
990
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
991
- tcg_out_qemu_st(s, args[0], -1, args[1], -1,
992
- args[2], TCG_TYPE_I32);
993
- } else {
994
+ case INDEX_op_qemu_st_a64_i32:
995
+ if (TCG_TARGET_REG_BITS == 32) {
996
tcg_out_qemu_st(s, args[0], -1, args[1], args[2],
997
args[3], TCG_TYPE_I32);
998
+ break;
999
}
1000
+ /* fall through */
1001
+ case INDEX_op_qemu_st_a32_i32:
1002
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1, args[2], TCG_TYPE_I32);
1003
break;
1004
- case INDEX_op_qemu_st_i64:
1005
+ case INDEX_op_qemu_st_a32_i64:
1006
if (TCG_TARGET_REG_BITS == 64) {
1007
tcg_out_qemu_st(s, args[0], -1, args[1], -1,
1008
args[2], TCG_TYPE_I64);
1009
- } else if (TARGET_LONG_BITS == 32) {
1010
+ } else {
1011
tcg_out_qemu_st(s, args[0], args[1], args[2], -1,
1012
args[3], TCG_TYPE_I64);
1013
+ }
1014
+ break;
1015
+ case INDEX_op_qemu_st_a64_i64:
1016
+ if (TCG_TARGET_REG_BITS == 64) {
1017
+ tcg_out_qemu_st(s, args[0], -1, args[1], -1,
1018
+ args[2], TCG_TYPE_I64);
1019
} else {
1020
tcg_out_qemu_st(s, args[0], args[1], args[2], args[3],
1021
args[4], TCG_TYPE_I64);
1022
}
1023
break;
1024
- case INDEX_op_qemu_st_i128:
1025
+ case INDEX_op_qemu_st_a32_i128:
1026
+ case INDEX_op_qemu_st_a64_i128:
1027
tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
1028
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
1029
break;
1030
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1031
case INDEX_op_sub2_i32:
1032
return C_O2_I4(r, r, rI, rZM, r, r);
1033
1034
- case INDEX_op_qemu_ld_i32:
1035
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
1036
- ? C_O1_I1(r, r)
1037
- : C_O1_I2(r, r, r));
1038
-
1039
- case INDEX_op_qemu_st_i32:
1040
- return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
1041
- ? C_O0_I2(r, r)
1042
- : C_O0_I3(r, r, r));
1043
-
1044
- case INDEX_op_qemu_ld_i64:
1045
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
1046
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
1047
- : C_O2_I2(r, r, r, r));
1048
-
1049
- case INDEX_op_qemu_st_i64:
1050
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
1051
- : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
1052
- : C_O0_I4(r, r, r, r));
1053
-
1054
- case INDEX_op_qemu_ld_i128:
1055
+ case INDEX_op_qemu_ld_a32_i32:
1056
+ return C_O1_I1(r, r);
1057
+ case INDEX_op_qemu_ld_a64_i32:
1058
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
1059
+ case INDEX_op_qemu_ld_a32_i64:
1060
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
1061
+ case INDEX_op_qemu_ld_a64_i64:
1062
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
1063
+ case INDEX_op_qemu_ld_a32_i128:
1064
+ case INDEX_op_qemu_ld_a64_i128:
1065
return C_O2_I1(o, m, r);
1066
- case INDEX_op_qemu_st_i128:
1067
+
1068
+ case INDEX_op_qemu_st_a32_i32:
1069
+ return C_O0_I2(r, r);
1070
+ case INDEX_op_qemu_st_a64_i32:
1071
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1072
+ case INDEX_op_qemu_st_a32_i64:
1073
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1074
+ case INDEX_op_qemu_st_a64_i64:
1075
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
1076
+ case INDEX_op_qemu_st_a32_i128:
1077
+ case INDEX_op_qemu_st_a64_i128:
1078
return C_O0_I3(o, m, r);
1079
1080
case INDEX_op_add_vec:
1081
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
1082
index XXXXXXX..XXXXXXX 100644
1083
--- a/tcg/riscv/tcg-target.c.inc
1084
+++ b/tcg/riscv/tcg-target.c.inc
1085
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1086
tcg_out_setcond(s, args[3], a0, a1, a2);
1087
break;
1088
1089
- case INDEX_op_qemu_ld_i32:
1090
+ case INDEX_op_qemu_ld_a32_i32:
1091
+ case INDEX_op_qemu_ld_a64_i32:
1092
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1093
break;
1094
- case INDEX_op_qemu_ld_i64:
1095
+ case INDEX_op_qemu_ld_a32_i64:
1096
+ case INDEX_op_qemu_ld_a64_i64:
1097
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1098
break;
1099
- case INDEX_op_qemu_st_i32:
1100
+ case INDEX_op_qemu_st_a32_i32:
1101
+ case INDEX_op_qemu_st_a64_i32:
1102
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1103
break;
1104
- case INDEX_op_qemu_st_i64:
1105
+ case INDEX_op_qemu_st_a32_i64:
1106
+ case INDEX_op_qemu_st_a64_i64:
1107
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1108
break;
1109
1110
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1111
case INDEX_op_sub2_i64:
1112
return C_O2_I4(r, r, rZ, rZ, rM, rM);
1113
1114
- case INDEX_op_qemu_ld_i32:
1115
- case INDEX_op_qemu_ld_i64:
1116
+ case INDEX_op_qemu_ld_a32_i32:
1117
+ case INDEX_op_qemu_ld_a64_i32:
1118
+ case INDEX_op_qemu_ld_a32_i64:
1119
+ case INDEX_op_qemu_ld_a64_i64:
1120
return C_O1_I1(r, r);
1121
- case INDEX_op_qemu_st_i32:
1122
- case INDEX_op_qemu_st_i64:
1123
+ case INDEX_op_qemu_st_a32_i32:
1124
+ case INDEX_op_qemu_st_a64_i32:
1125
+ case INDEX_op_qemu_st_a32_i64:
1126
+ case INDEX_op_qemu_st_a64_i64:
1127
return C_O0_I2(rZ, r);
1128
1129
default:
1130
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
1131
index XXXXXXX..XXXXXXX 100644
1132
--- a/tcg/s390x/tcg-target.c.inc
1133
+++ b/tcg/s390x/tcg-target.c.inc
1134
@@ -XXX,XX +XXX,XX @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1135
args[2], const_args[2], args[3], const_args[3], args[4]);
1136
break;
1137
1138
- case INDEX_op_qemu_ld_i32:
1139
+ case INDEX_op_qemu_ld_a32_i32:
1140
+ case INDEX_op_qemu_ld_a64_i32:
1141
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
1142
break;
1143
- case INDEX_op_qemu_ld_i64:
1144
+ case INDEX_op_qemu_ld_a32_i64:
1145
+ case INDEX_op_qemu_ld_a64_i64:
1146
tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
1147
break;
1148
- case INDEX_op_qemu_st_i32:
1149
+ case INDEX_op_qemu_st_a32_i32:
1150
+ case INDEX_op_qemu_st_a64_i32:
1151
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
1152
break;
1153
- case INDEX_op_qemu_st_i64:
1154
+ case INDEX_op_qemu_st_a32_i64:
1155
+ case INDEX_op_qemu_st_a64_i64:
1156
tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
1157
break;
1158
- case INDEX_op_qemu_ld_i128:
1159
+ case INDEX_op_qemu_ld_a32_i128:
1160
+ case INDEX_op_qemu_ld_a64_i128:
1161
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], true);
1162
break;
1163
- case INDEX_op_qemu_st_i128:
1164
+ case INDEX_op_qemu_st_a32_i128:
1165
+ case INDEX_op_qemu_st_a64_i128:
1166
tcg_out_qemu_ldst_i128(s, args[0], args[1], args[2], args[3], false);
1167
break;
1168
1169
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1170
case INDEX_op_ctpop_i64:
1171
return C_O1_I1(r, r);
1172
1173
- case INDEX_op_qemu_ld_i32:
1174
- case INDEX_op_qemu_ld_i64:
1175
+ case INDEX_op_qemu_ld_a32_i32:
1176
+ case INDEX_op_qemu_ld_a64_i32:
1177
+ case INDEX_op_qemu_ld_a32_i64:
1178
+ case INDEX_op_qemu_ld_a64_i64:
1179
return C_O1_I1(r, r);
1180
- case INDEX_op_qemu_st_i64:
1181
- case INDEX_op_qemu_st_i32:
1182
+ case INDEX_op_qemu_st_a32_i64:
1183
+ case INDEX_op_qemu_st_a64_i64:
1184
+ case INDEX_op_qemu_st_a32_i32:
1185
+ case INDEX_op_qemu_st_a64_i32:
1186
return C_O0_I2(r, r);
1187
- case INDEX_op_qemu_ld_i128:
1188
+ case INDEX_op_qemu_ld_a32_i128:
1189
+ case INDEX_op_qemu_ld_a64_i128:
1190
return C_O2_I1(o, m, r);
1191
- case INDEX_op_qemu_st_i128:
1192
+ case INDEX_op_qemu_st_a32_i128:
1193
+ case INDEX_op_qemu_st_a64_i128:
1194
return C_O0_I3(o, m, r);
1195
1196
case INDEX_op_deposit_i32:
1197
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
1198
index XXXXXXX..XXXXXXX 100644
1199
--- a/tcg/sparc64/tcg-target.c.inc
1200
+++ b/tcg/sparc64/tcg-target.c.inc
1201
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1202
tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX);
1203
break;
1204
1205
- case INDEX_op_qemu_ld_i32:
1206
+ case INDEX_op_qemu_ld_a32_i32:
1207
+ case INDEX_op_qemu_ld_a64_i32:
1208
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
1209
break;
1210
- case INDEX_op_qemu_ld_i64:
1211
+ case INDEX_op_qemu_ld_a32_i64:
1212
+ case INDEX_op_qemu_ld_a64_i64:
1213
tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
1214
break;
1215
- case INDEX_op_qemu_st_i32:
1216
+ case INDEX_op_qemu_st_a32_i32:
1217
+ case INDEX_op_qemu_st_a64_i32:
1218
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
1219
break;
1220
- case INDEX_op_qemu_st_i64:
1221
+ case INDEX_op_qemu_st_a32_i64:
1222
+ case INDEX_op_qemu_st_a64_i64:
1223
tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
1224
break;
1225
1226
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1227
case INDEX_op_extu_i32_i64:
1228
case INDEX_op_extrl_i64_i32:
1229
case INDEX_op_extrh_i64_i32:
1230
- case INDEX_op_qemu_ld_i32:
1231
- case INDEX_op_qemu_ld_i64:
1232
+ case INDEX_op_qemu_ld_a32_i32:
1233
+ case INDEX_op_qemu_ld_a64_i32:
1234
+ case INDEX_op_qemu_ld_a32_i64:
1235
+ case INDEX_op_qemu_ld_a64_i64:
1236
return C_O1_I1(r, r);
1237
1238
case INDEX_op_st8_i32:
1239
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1240
case INDEX_op_st_i32:
1241
case INDEX_op_st32_i64:
1242
case INDEX_op_st_i64:
1243
- case INDEX_op_qemu_st_i32:
1244
- case INDEX_op_qemu_st_i64:
1245
+ case INDEX_op_qemu_st_a32_i32:
1246
+ case INDEX_op_qemu_st_a64_i32:
1247
+ case INDEX_op_qemu_st_a32_i64:
1248
+ case INDEX_op_qemu_st_a64_i64:
1249
return C_O0_I2(rZ, r);
1250
1251
case INDEX_op_add_i32:
1252
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
1253
index XXXXXXX..XXXXXXX 100644
1254
--- a/tcg/tci/tcg-target.c.inc
1255
+++ b/tcg/tci/tcg-target.c.inc
1256
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
1257
case INDEX_op_setcond2_i32:
1258
return C_O1_I4(r, r, r, r, r);
1259
1260
- case INDEX_op_qemu_ld_i32:
1261
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1262
- ? C_O1_I1(r, r)
1263
- : C_O1_I2(r, r, r));
1264
- case INDEX_op_qemu_ld_i64:
1265
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
1266
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O2_I1(r, r, r)
1267
- : C_O2_I2(r, r, r, r));
1268
- case INDEX_op_qemu_st_i32:
1269
- return (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1270
- ? C_O0_I2(r, r)
1271
- : C_O0_I3(r, r, r));
1272
- case INDEX_op_qemu_st_i64:
1273
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
1274
- : TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? C_O0_I3(r, r, r)
1275
- : C_O0_I4(r, r, r, r));
1276
+ case INDEX_op_qemu_ld_a32_i32:
1277
+ return C_O1_I1(r, r);
1278
+ case INDEX_op_qemu_ld_a64_i32:
1279
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O1_I2(r, r, r);
1280
+ case INDEX_op_qemu_ld_a32_i64:
1281
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I1(r, r, r);
1282
+ case INDEX_op_qemu_ld_a64_i64:
1283
+ return TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r) : C_O2_I2(r, r, r, r);
1284
+ case INDEX_op_qemu_st_a32_i32:
1285
+ return C_O0_I2(r, r);
1286
+ case INDEX_op_qemu_st_a64_i32:
1287
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1288
+ case INDEX_op_qemu_st_a32_i64:
1289
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I3(r, r, r);
1290
+ case INDEX_op_qemu_st_a64_i64:
1291
+ return TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r) : C_O0_I4(r, r, r, r);
1292
1293
default:
1294
g_assert_not_reached();
1295
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1296
tcg_out_op_rrrr(s, opc, args[0], args[1], args[2], args[3]);
1297
break;
1298
1299
- case INDEX_op_qemu_ld_i32:
1300
- case INDEX_op_qemu_st_i32:
1301
+ case INDEX_op_qemu_ld_a32_i32:
1302
+ case INDEX_op_qemu_ld_a64_i32:
1303
+ case INDEX_op_qemu_st_a32_i32:
1304
+ case INDEX_op_qemu_st_a64_i32:
1305
if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
1306
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
1307
} else {
1308
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1309
}
1310
break;
1311
1312
- case INDEX_op_qemu_ld_i64:
1313
- case INDEX_op_qemu_st_i64:
1314
+ case INDEX_op_qemu_ld_a32_i64:
1315
+ case INDEX_op_qemu_ld_a64_i64:
1316
+ case INDEX_op_qemu_st_a32_i64:
1317
+ case INDEX_op_qemu_st_a64_i64:
1318
if (TCG_TARGET_REG_BITS == 64) {
1319
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
1320
} else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
404
--
1321
--
405
2.25.1
1322
2.34.1
406
1323
407
1324
diff view generated by jsdifflib
1
There was no real reason for calls to have separate code here.
1
We now have the address size as part of the opcode, so
2
Unify init for calls vs non-calls using the call path, which
2
we no longer need to test TARGET_LONG_BITS. We can use
3
handles TCG_CALL_DUMMY_ARG.
3
uint64_t for target_ulong, as passed into load/store helpers.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
7
---
10
tcg/optimize.c | 25 +++++++++++--------------
8
tcg/tci.c | 61 +++++++++++++++++++++++++---------------
11
1 file changed, 11 insertions(+), 14 deletions(-)
9
tcg/tci/tcg-target.c.inc | 15 +++++-----
10
2 files changed, 46 insertions(+), 30 deletions(-)
12
11
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/tci.c b/tcg/tci.c
14
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/optimize.c
14
--- a/tcg/tci.c
16
+++ b/tcg/optimize.c
15
+++ b/tcg/tci.c
17
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
16
@@ -XXX,XX +XXX,XX @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
17
return result;
18
}
19
20
-static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
21
+static uint64_t tci_qemu_ld(CPUArchState *env, uint64_t taddr,
22
MemOpIdx oi, const void *tb_ptr)
23
{
24
MemOp mop = get_memop(oi);
25
@@ -XXX,XX +XXX,XX @@ static uint64_t tci_qemu_ld(CPUArchState *env, target_ulong taddr,
18
}
26
}
19
}
27
}
20
28
21
-static void init_arg_info(OptContext *ctx, TCGArg arg)
29
-static void tci_qemu_st(CPUArchState *env, target_ulong taddr, uint64_t val,
22
-{
30
+static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val,
23
- init_ts_info(ctx, arg_temp(arg));
31
MemOpIdx oi, const void *tb_ptr)
24
-}
25
-
26
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
27
{
32
{
28
TCGTemp *i, *g, *l;
33
MemOp mop = get_memop(oi);
29
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
34
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
35
TCGReg r0, r1, r2, r3, r4, r5;
36
tcg_target_ulong t1;
37
TCGCond condition;
38
- target_ulong taddr;
39
uint8_t pos, len;
40
uint32_t tmp32;
41
- uint64_t tmp64;
42
+ uint64_t tmp64, taddr;
43
uint64_t T1, T2;
44
MemOpIdx oi;
45
int32_t ofs;
46
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
47
break;
48
49
case INDEX_op_qemu_ld_a32_i32:
50
+ tci_args_rrm(insn, &r0, &r1, &oi);
51
+ taddr = (uint32_t)regs[r1];
52
+ goto do_ld_i32;
53
case INDEX_op_qemu_ld_a64_i32:
54
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
55
+ if (TCG_TARGET_REG_BITS == 64) {
56
tci_args_rrm(insn, &r0, &r1, &oi);
57
taddr = regs[r1];
58
} else {
59
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
60
taddr = tci_uint64(regs[r2], regs[r1]);
61
}
62
- tmp32 = tci_qemu_ld(env, taddr, oi, tb_ptr);
63
- regs[r0] = tmp32;
64
+ do_ld_i32:
65
+ regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr);
66
break;
67
68
case INDEX_op_qemu_ld_a32_i64:
69
+ if (TCG_TARGET_REG_BITS == 64) {
70
+ tci_args_rrm(insn, &r0, &r1, &oi);
71
+ taddr = (uint32_t)regs[r1];
72
+ } else {
73
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
74
+ taddr = (uint32_t)regs[r2];
75
+ }
76
+ goto do_ld_i64;
77
case INDEX_op_qemu_ld_a64_i64:
78
if (TCG_TARGET_REG_BITS == 64) {
79
tci_args_rrm(insn, &r0, &r1, &oi);
80
taddr = regs[r1];
81
- } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
82
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
83
- taddr = regs[r2];
84
} else {
85
tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
86
taddr = tci_uint64(regs[r3], regs[r2]);
87
oi = regs[r4];
88
}
89
+ do_ld_i64:
90
tmp64 = tci_qemu_ld(env, taddr, oi, tb_ptr);
91
if (TCG_TARGET_REG_BITS == 32) {
92
tci_write_reg64(regs, r1, r0, tmp64);
93
@@ -XXX,XX +XXX,XX @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
94
break;
95
96
case INDEX_op_qemu_st_a32_i32:
97
+ tci_args_rrm(insn, &r0, &r1, &oi);
98
+ taddr = (uint32_t)regs[r1];
99
+ goto do_st_i32;
100
case INDEX_op_qemu_st_a64_i32:
101
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
102
+ if (TCG_TARGET_REG_BITS == 64) {
103
tci_args_rrm(insn, &r0, &r1, &oi);
104
taddr = regs[r1];
105
} else {
106
tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
107
taddr = tci_uint64(regs[r2], regs[r1]);
108
}
109
- tmp32 = regs[r0];
110
- tci_qemu_st(env, taddr, tmp32, oi, tb_ptr);
111
+ do_st_i32:
112
+ tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr);
113
break;
114
115
case INDEX_op_qemu_st_a32_i64:
116
+ if (TCG_TARGET_REG_BITS == 64) {
117
+ tci_args_rrm(insn, &r0, &r1, &oi);
118
+ tmp64 = regs[r0];
119
+ taddr = (uint32_t)regs[r1];
120
+ } else {
121
+ tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
122
+ tmp64 = tci_uint64(regs[r1], regs[r0]);
123
+ taddr = (uint32_t)regs[r2];
124
+ }
125
+ goto do_st_i64;
126
case INDEX_op_qemu_st_a64_i64:
127
if (TCG_TARGET_REG_BITS == 64) {
128
tci_args_rrm(insn, &r0, &r1, &oi);
129
- taddr = regs[r1];
130
tmp64 = regs[r0];
131
+ taddr = regs[r1];
132
} else {
133
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
134
- tci_args_rrrm(insn, &r0, &r1, &r2, &oi);
135
- taddr = regs[r2];
136
- } else {
137
- tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
138
- taddr = tci_uint64(regs[r3], regs[r2]);
139
- oi = regs[r4];
140
- }
141
+ tci_args_rrrrr(insn, &r0, &r1, &r2, &r3, &r4);
142
tmp64 = tci_uint64(regs[r1], regs[r0]);
143
+ taddr = tci_uint64(regs[r3], regs[r2]);
144
+ oi = regs[r4];
145
}
146
+ do_st_i64:
147
tci_qemu_st(env, taddr, tmp64, oi, tb_ptr);
148
break;
149
150
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
151
index XXXXXXX..XXXXXXX 100644
152
--- a/tcg/tci/tcg-target.c.inc
153
+++ b/tcg/tci/tcg-target.c.inc
154
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
30
return false;
155
return false;
31
}
156
}
32
157
33
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
158
-static void stack_bounds_check(TCGReg base, target_long offset)
34
+{
159
+static void stack_bounds_check(TCGReg base, intptr_t offset)
35
+ for (int i = 0; i < nb_args; i++) {
36
+ TCGTemp *ts = arg_temp(op->args[i]);
37
+ if (ts) {
38
+ init_ts_info(ctx, ts);
39
+ }
40
+ }
41
+}
42
+
43
/* Propagate constants and copies, fold constant expressions. */
44
void tcg_optimize(TCGContext *s)
45
{
160
{
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
161
if (base == TCG_REG_CALL_STACK) {
47
if (opc == INDEX_op_call) {
162
tcg_debug_assert(offset >= 0);
48
nb_oargs = TCGOP_CALLO(op);
163
@@ -XXX,XX +XXX,XX @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
49
nb_iargs = TCGOP_CALLI(op);
164
break;
50
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
165
51
- TCGTemp *ts = arg_temp(op->args[i]);
166
case INDEX_op_qemu_ld_a32_i32:
52
- if (ts) {
167
- case INDEX_op_qemu_ld_a64_i32:
53
- init_ts_info(&ctx, ts);
168
case INDEX_op_qemu_st_a32_i32:
54
- }
169
+ tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
55
- }
170
+ break;
171
+ case INDEX_op_qemu_ld_a64_i32:
172
case INDEX_op_qemu_st_a64_i32:
173
- if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
174
+ case INDEX_op_qemu_ld_a32_i64:
175
+ case INDEX_op_qemu_st_a32_i64:
176
+ if (TCG_TARGET_REG_BITS == 64) {
177
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
56
} else {
178
} else {
57
nb_oargs = def->nb_oargs;
179
tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
58
nb_iargs = def->nb_iargs;
59
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
60
- init_arg_info(&ctx, op->args[i]);
61
- }
62
}
180
}
63
+ init_arguments(&ctx, op, nb_oargs + nb_iargs);
181
break;
64
182
-
65
/* Do copy propagation */
183
- case INDEX_op_qemu_ld_a32_i64:
66
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
184
case INDEX_op_qemu_ld_a64_i64:
185
- case INDEX_op_qemu_st_a32_i64:
186
case INDEX_op_qemu_st_a64_i64:
187
if (TCG_TARGET_REG_BITS == 64) {
188
tcg_out_op_rrm(s, opc, args[0], args[1], args[2]);
189
- } else if (TARGET_LONG_BITS <= TCG_TARGET_REG_BITS) {
190
- tcg_out_op_rrrm(s, opc, args[0], args[1], args[2], args[3]);
191
} else {
192
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_TMP, args[4]);
193
tcg_out_op_rrrrr(s, opc, args[0], args[1],
67
--
194
--
68
2.25.1
195
2.34.1
69
196
70
197
diff view generated by jsdifflib
1
Recognize the constant function for or-complement.
1
Keep all 32-bit values zero extended in the register, not solely when
2
addresses are 32 bits. This eliminates a dependency on TARGET_LONG_BITS.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 1 +
7
tcg/i386/tcg-target.h | 6 +++---
9
1 file changed, 1 insertion(+)
8
1 file changed, 3 insertions(+), 3 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/tcg/i386/tcg-target.h
14
+++ b/tcg/optimize.c
13
+++ b/tcg/i386/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ extern bool have_atomic16;
16
static bool fold_orc(OptContext *ctx, TCGOp *op)
15
#define TCG_TARGET_HAS_mulsh_i32 0
17
{
16
18
if (fold_const2(ctx, op) ||
17
#if TCG_TARGET_REG_BITS == 64
19
+ fold_xx_to_i(ctx, op, -1) ||
18
-/* Keep target addresses zero-extended in a register. */
20
fold_xi_to_x(ctx, op, -1) ||
19
-#define TCG_TARGET_HAS_extrl_i64_i32 (TARGET_LONG_BITS == 32)
21
fold_ix_to_not(ctx, op, 0)) {
20
-#define TCG_TARGET_HAS_extrh_i64_i32 (TARGET_LONG_BITS == 32)
22
return true;
21
+/* Keep 32-bit values zero-extended in a register. */
22
+#define TCG_TARGET_HAS_extrl_i64_i32 1
23
+#define TCG_TARGET_HAS_extrh_i64_i32 1
24
#define TCG_TARGET_HAS_div2_i64 1
25
#define TCG_TARGET_HAS_rot_i64 1
26
#define TCG_TARGET_HAS_ext8s_i64 1
23
--
27
--
24
2.25.1
28
2.34.1
25
29
26
30
diff view generated by jsdifflib
1
Reduce some code duplication by folding the NE and EQ cases.
1
Since TCG_TYPE_I32 values are kept zero-extended in registers, via
2
omission of the REXW bit, we need not extend if the register matches.
3
This is already relied upon by qemu_{ld,st}.
2
4
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
8
---
7
tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
9
tcg/i386/tcg-target.c.inc | 4 +++-
8
1 file changed, 72 insertions(+), 73 deletions(-)
10
1 file changed, 3 insertions(+), 1 deletion(-)
9
11
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
14
--- a/tcg/i386/tcg-target.c.inc
13
+++ b/tcg/optimize.c
15
+++ b/tcg/i386/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
15
return fold_const2(ctx, op);
17
18
static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
19
{
20
- tcg_out_ext32u(s, dest, src);
21
+ if (dest != src) {
22
+ tcg_out_ext32u(s, dest, src);
23
+ }
16
}
24
}
17
25
18
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
26
static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
19
+{
20
+ TCGCond cond = op->args[5];
21
+ int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
22
+ int inv = 0;
23
+
24
+ if (i >= 0) {
25
+ goto do_setcond_const;
26
+ }
27
+
28
+ switch (cond) {
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
34
+ */
35
+ if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
36
+ arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
37
+ goto do_setcond_high;
38
+ }
39
+ break;
40
+
41
+ case TCG_COND_NE:
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
50
+ op->args[3], cond);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_setcond_const;
54
+ case 1:
55
+ goto do_setcond_high;
56
+ }
57
+
58
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
59
+ op->args[4], cond);
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_setcond_const;
63
+ case 1:
64
+ op->args[2] = op->args[3];
65
+ op->args[3] = cond;
66
+ op->opc = INDEX_op_setcond_i32;
67
+ break;
68
+ }
69
+ break;
70
+
71
+ default:
72
+ break;
73
+
74
+ do_setcond_high:
75
+ op->args[1] = op->args[2];
76
+ op->args[2] = op->args[4];
77
+ op->args[3] = cond;
78
+ op->opc = INDEX_op_setcond_i32;
79
+ break;
80
+ }
81
+ return false;
82
+
83
+ do_setcond_const:
84
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
85
+}
86
+
87
static bool fold_shift(OptContext *ctx, TCGOp *op)
88
{
89
return fold_const2(ctx, op);
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
}
92
break;
93
94
- case INDEX_op_setcond2_i32:
95
- i = do_constant_folding_cond2(&op->args[1], &op->args[3],
96
- op->args[5]);
97
- if (i >= 0) {
98
- do_setcond_const:
99
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
100
- continue;
101
- }
102
- if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
103
- && arg_is_const(op->args[3])
104
- && arg_info(op->args[3])->val == 0
105
- && arg_is_const(op->args[4])
106
- && arg_info(op->args[4])->val == 0) {
107
- /* Simplify LT/GE comparisons vs zero to a single compare
108
- vs the high word of the input. */
109
- do_setcond_high:
110
- reset_temp(op->args[0]);
111
- arg_info(op->args[0])->z_mask = 1;
112
- op->opc = INDEX_op_setcond_i32;
113
- op->args[1] = op->args[2];
114
- op->args[2] = op->args[4];
115
- op->args[3] = op->args[5];
116
- break;
117
- }
118
- if (op->args[5] == TCG_COND_EQ) {
119
- /* Simplify EQ comparisons where one of the pairs
120
- can be simplified. */
121
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
122
- op->args[1], op->args[3],
123
- TCG_COND_EQ);
124
- if (i == 0) {
125
- goto do_setcond_const;
126
- } else if (i > 0) {
127
- goto do_setcond_high;
128
- }
129
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
130
- op->args[2], op->args[4],
131
- TCG_COND_EQ);
132
- if (i == 0) {
133
- goto do_setcond_high;
134
- } else if (i < 0) {
135
- break;
136
- }
137
- do_setcond_low:
138
- reset_temp(op->args[0]);
139
- arg_info(op->args[0])->z_mask = 1;
140
- op->opc = INDEX_op_setcond_i32;
141
- op->args[2] = op->args[3];
142
- op->args[3] = op->args[5];
143
- break;
144
- }
145
- if (op->args[5] == TCG_COND_NE) {
146
- /* Simplify NE comparisons where one of the pairs
147
- can be simplified. */
148
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
149
- op->args[1], op->args[3],
150
- TCG_COND_NE);
151
- if (i == 0) {
152
- goto do_setcond_high;
153
- } else if (i > 0) {
154
- goto do_setcond_const;
155
- }
156
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
157
- op->args[2], op->args[4],
158
- TCG_COND_NE);
159
- if (i == 0) {
160
- goto do_setcond_low;
161
- } else if (i > 0) {
162
- goto do_setcond_const;
163
- }
164
- }
165
- break;
166
-
167
default:
168
break;
169
170
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
171
CASE_OP_32_64(shr):
172
done = fold_shift(&ctx, op);
173
break;
174
+ case INDEX_op_setcond2_i32:
175
+ done = fold_setcond2(&ctx, op);
176
+ break;
177
CASE_OP_32_64_VEC(sub):
178
done = fold_sub(&ctx, op);
179
break;
180
--
27
--
181
2.25.1
28
2.34.1
182
29
183
30
diff view generated by jsdifflib
1
Rather than try to keep these up-to-date across folding,
1
Because of its use on tgen_arithi, this value must be a signed
2
re-read nb_oargs at the end, after re-reading the opcode.
2
32-bit quantity, as that is what may be encoded in the insn.
3
The truncation of the value to unsigned for 32-bit guests is
4
done via the REX bit via 'trexw'.
3
5
4
A couple of asserts need dropping, but that will take care
6
Removes the only uses of target_ulong from this tcg backend.
5
of itself as we split the function further.
6
7
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
10
---
11
tcg/optimize.c | 14 ++++----------
11
tcg/i386/tcg-target.c.inc | 4 ++--
12
1 file changed, 4 insertions(+), 10 deletions(-)
12
1 file changed, 2 insertions(+), 2 deletions(-)
13
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
16
--- a/tcg/i386/tcg-target.c.inc
17
+++ b/tcg/optimize.c
17
+++ b/tcg/i386/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
18
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
19
19
int trexw = 0, hrexw = 0, tlbrexw = 0;
20
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
20
unsigned mem_index = get_mmuidx(oi);
21
uint64_t z_mask, partmask, affected, tmp;
21
unsigned s_mask = (1 << s_bits) - 1;
22
- int nb_oargs, nb_iargs;
22
- target_ulong tlb_mask;
23
TCGOpcode opc = op->opc;
23
+ int tlb_mask;
24
const TCGOpDef *def;
24
25
25
ldst = new_ldst_label(s);
26
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
26
ldst->is_ld = is_ld;
27
}
27
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
28
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
29
def = &tcg_op_defs[opc];
29
addrlo, s_mask - a_mask);
30
- nb_oargs = def->nb_oargs;
30
}
31
- nb_iargs = def->nb_iargs;
31
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
32
- init_arguments(&ctx, op, nb_oargs + nb_iargs);
32
+ tlb_mask = TARGET_PAGE_MASK | a_mask;
33
- copy_propagate(&ctx, op, nb_oargs, nb_iargs);
33
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
34
+ init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
34
35
+ copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
35
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
36
37
/* For commutative operations make constant second argument */
38
switch (opc) {
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
40
41
CASE_OP_32_64(qemu_ld):
42
{
43
- MemOpIdx oi = op->args[nb_oargs + nb_iargs];
44
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
45
MemOp mop = get_memop(oi);
46
if (!(mop & MO_SIGN)) {
47
z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
}
50
51
if (partmask == 0) {
52
- tcg_debug_assert(nb_oargs == 1);
53
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
54
continue;
55
}
56
if (affected == 0) {
57
- tcg_debug_assert(nb_oargs == 1);
58
tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
59
continue;
60
}
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
} else if (args_are_copies(op->args[1], op->args[2])) {
63
op->opc = INDEX_op_dup_vec;
64
TCGOP_VECE(op) = MO_32;
65
- nb_iargs = 1;
66
}
67
break;
68
69
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
70
op->opc = opc = (opc == INDEX_op_movcond_i32
71
? INDEX_op_setcond_i32
72
: INDEX_op_setcond_i64);
73
- nb_iargs = 2;
74
}
75
break;
76
77
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
78
if (def->flags & TCG_OPF_BB_END) {
79
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
80
} else {
81
+ int nb_oargs = def->nb_oargs;
82
for (i = 0; i < nb_oargs; i++) {
83
reset_temp(op->args[i]);
84
/* Save the corresponding known-zero bits mask for the
85
--
36
--
86
2.25.1
37
2.34.1
87
38
88
39
diff view generated by jsdifflib
1
The results are generally 6 bit unsigned values, though
1
All uses can be infered from the INDEX_op_qemu_*_a{32,64}_* opcode
2
the count leading and trailing bits may produce any value
2
being used. Add a field into TCGLabelQemuLdst to record the usage.
3
for a zero input.
4
3
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/optimize.c | 3 ++-
7
tcg/i386/tcg-target.c.inc | 8 +++-----
10
1 file changed, 2 insertions(+), 1 deletion(-)
8
1 file changed, 3 insertions(+), 5 deletions(-)
11
9
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
12
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/optimize.c
13
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
17
g_assert_not_reached();
15
ldst->addrhi_reg = addrhi;
18
}
16
19
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
17
if (TCG_TARGET_REG_BITS == 64) {
20
-
18
- if (TARGET_LONG_BITS == 64) {
21
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
19
- ttype = TCG_TYPE_I64;
22
return false;
20
- trexw = P_REXW;
23
}
21
- }
24
22
+ ttype = s->addr_type;
25
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
23
+ trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
26
default:
24
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
27
g_assert_not_reached();
25
hrexw = P_REXW;
28
}
26
if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
29
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
27
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
30
return false;
28
ldst->label_ptr[0] = s->code_ptr;
31
}
29
s->code_ptr += 4;
30
31
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
32
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I64) {
33
/* cmp 4(TCG_REG_L0), addrhi */
34
tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
32
35
33
--
36
--
34
2.25.1
37
2.34.1
35
38
36
39
diff view generated by jsdifflib
1
This will expose the variable to subroutines that
1
All uses can be infered from the INDEX_op_qemu_*_a{32,64}_*
2
will be broken out of tcg_optimize.
2
opcode being used.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/optimize.c | 11 ++++++-----
7
tcg/arm/tcg-target.c.inc | 14 +++++++-------
10
1 file changed, 6 insertions(+), 5 deletions(-)
8
1 file changed, 7 insertions(+), 7 deletions(-)
11
9
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
12
--- a/tcg/arm/tcg-target.c.inc
15
+++ b/tcg/optimize.c
13
+++ b/tcg/arm/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
14
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
17
15
* Load the tlb comparator into R2/R3 and the fast path addend into R1.
18
typedef struct OptContext {
16
*/
19
TCGContext *tcg;
17
if (cmp_off == 0) {
20
+ TCGOp *prev_mb;
18
- if (TARGET_LONG_BITS == 64) {
21
TCGTempSet temps_used;
19
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
22
} OptContext;
20
- } else {
23
21
+ if (s->addr_type == TCG_TYPE_I32) {
24
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
22
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
25
void tcg_optimize(TCGContext *s)
23
+ } else {
26
{
24
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
27
int nb_temps, nb_globals, i;
28
- TCGOp *op, *op_next, *prev_mb = NULL;
29
+ TCGOp *op, *op_next;
30
OptContext ctx = { .tcg = s };
31
32
/* Array VALS has an element for each temp.
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
34
}
25
}
35
26
} else {
36
/* Eliminate duplicate and redundant fence instructions. */
27
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
37
- if (prev_mb) {
28
TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
38
+ if (ctx.prev_mb) {
29
- if (TARGET_LONG_BITS == 64) {
39
switch (opc) {
30
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
40
case INDEX_op_mb:
31
- } else {
41
/* Merge two barriers of the same type into one,
32
+ if (s->addr_type == TCG_TYPE_I32) {
42
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
43
* barrier. This is stricter than specified but for
34
+ } else {
44
* the purposes of TCG is better than not optimizing.
35
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
45
*/
46
- prev_mb->args[0] |= op->args[0];
47
+ ctx.prev_mb->args[0] |= op->args[0];
48
tcg_op_remove(s, op);
49
break;
50
51
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
52
case INDEX_op_qemu_st_i64:
53
case INDEX_op_call:
54
/* Opcodes that touch guest memory stop the optimization. */
55
- prev_mb = NULL;
56
+ ctx.prev_mb = NULL;
57
break;
58
}
59
} else if (opc == INDEX_op_mb) {
60
- prev_mb = op;
61
+ ctx.prev_mb = op;
62
}
36
}
63
}
37
}
64
}
38
39
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
40
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
41
}
42
43
- if (TARGET_LONG_BITS == 64) {
44
+ if (s->addr_type != TCG_TYPE_I32) {
45
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
46
}
47
#else
65
--
48
--
66
2.25.1
49
2.34.1
67
50
68
51
diff view generated by jsdifflib
1
For constant shifts, we can simply shift the s_mask.
1
Eliminate the test vs TARGET_LONG_BITS by considering this
2
2
predicate to be always true, and simplify accordingly.
3
For variable shifts, we know that sar does not reduce
4
the s_mask, which helps for sequences like
5
6
ext32s_i64 t, in
7
sar_i64 t, t, v
8
ext32s_i64 out, t
9
10
allowing the final extend to be eliminated.
11
3
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
6
---
16
tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
7
tcg/aarch64/tcg-target.c.inc | 19 +++++++++----------
17
1 file changed, 47 insertions(+), 3 deletions(-)
8
1 file changed, 9 insertions(+), 10 deletions(-)
18
9
19
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/optimize.c
12
--- a/tcg/aarch64/tcg-target.c.inc
22
+++ b/tcg/optimize.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
23
@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
14
@@ -XXX,XX +XXX,XX @@ bool have_lse2;
24
return ~(~0ull >> rep);
15
#define TCG_VEC_TMP0 TCG_REG_V31
25
}
16
26
17
#ifndef CONFIG_SOFTMMU
27
+/*
18
-/* Note that XZR cannot be encoded in the address base register slot,
28
+ * Recreate a properly left-aligned smask after manipulation.
19
- as that actaully encodes SP. So if we need to zero-extend the guest
29
+ * Some bit-shuffling, particularly shifts and rotates, may
20
- address, via the address index register slot, we need to load even
30
+ * retain sign bits on the left, but may scatter disconnected
21
- a zero guest base into a register. */
31
+ * sign bits on the right. Retain only what remains to the left.
22
-#define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
32
+ */
23
#define TCG_REG_GUEST_BASE TCG_REG_X28
33
+static uint64_t smask_from_smask(int64_t smask)
24
#endif
34
+{
25
35
+ /* Only the 1 bits are significant for smask */
26
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
36
+ return smask_from_zmask(~smask);
27
tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
37
+}
38
+
39
static inline TempOptInfo *ts_info(TCGTemp *ts)
40
{
41
return ts->state_ptr;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
43
44
static bool fold_shift(OptContext *ctx, TCGOp *op)
45
{
46
+ uint64_t s_mask, z_mask, sign;
47
+
48
if (fold_const2(ctx, op) ||
49
fold_ix_to_i(ctx, op, 0) ||
50
fold_xi_to_x(ctx, op, 0)) {
51
return true;
52
}
28
}
53
29
54
+ s_mask = arg_info(op->args[1])->s_mask;
30
- if (USE_GUEST_BASE) {
55
+ z_mask = arg_info(op->args[1])->z_mask;
31
+ if (guest_base || addr_type == TCG_TYPE_I32) {
56
+
32
h->base = TCG_REG_GUEST_BASE;
57
if (arg_is_const(op->args[2])) {
33
h->index = addr_reg;
58
- ctx->z_mask = do_constant_folding(op->opc, ctx->type,
34
h->index_ext = addr_type;
59
- arg_info(op->args[1])->z_mask,
35
@@ -XXX,XX +XXX,XX @@ static void tcg_target_qemu_prologue(TCGContext *s)
60
- arg_info(op->args[2])->val);
36
CPU_TEMP_BUF_NLONGS * sizeof(long));
61
+ int sh = arg_info(op->args[2])->val;
37
62
+
38
#if !defined(CONFIG_SOFTMMU)
63
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
39
- if (USE_GUEST_BASE) {
64
+
40
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
65
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
41
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
66
+ ctx->s_mask = smask_from_smask(s_mask);
42
- }
67
+
43
+ /*
68
return fold_masks(ctx, op);
44
+ * Note that XZR cannot be encoded in the address base register slot,
69
}
45
+ * as that actaully encodes SP. Depending on the guest, we may need
70
+
46
+ * to zero-extend the guest address via the address index register slot,
71
+ switch (op->opc) {
47
+ * therefore we need to load even a zero guest base into a register.
72
+ CASE_OP_32_64(sar):
48
+ */
73
+ /*
49
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
74
+ * Arithmetic right shift will not reduce the number of
50
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
75
+ * input sign repetitions.
51
#endif
76
+ */
52
77
+ ctx->s_mask = s_mask;
53
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
78
+ break;
79
+ CASE_OP_32_64(shr):
80
+ /*
81
+ * If the sign bit is known zero, then logical right shift
82
+ * will not reduced the number of input sign repetitions.
83
+ */
84
+ sign = (s_mask & -s_mask) >> 1;
85
+ if (!(z_mask & sign)) {
86
+ ctx->s_mask = s_mask;
87
+ }
88
+ break;
89
+ default:
90
+ break;
91
+ }
92
+
93
return false;
94
}
95
96
--
54
--
97
2.25.1
55
2.34.1
98
56
99
57
diff view generated by jsdifflib
1
Break the final cleanup clause out of the main switch
1
All uses replaced with TCGContext.addr_type.
2
statement. When fully folding an opcode to mov/movi,
3
use "continue" to process the next opcode, else break
4
to fall into the final cleanup.
5
2
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
5
---
11
tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
6
tcg/aarch64/tcg-target.c.inc | 11 +++++------
12
1 file changed, 94 insertions(+), 96 deletions(-)
7
1 file changed, 5 insertions(+), 6 deletions(-)
13
8
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
11
--- a/tcg/aarch64/tcg-target.c.inc
17
+++ b/tcg/optimize.c
12
+++ b/tcg/aarch64/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
13
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
19
switch (opc) {
14
TCGReg addr_reg, MemOpIdx oi,
20
CASE_OP_32_64_VEC(mov):
15
bool is_ld)
21
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
16
{
22
- break;
17
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
23
+ continue;
18
+ TCGType addr_type = s->addr_type;
24
19
TCGLabelQemuLdst *ldst = NULL;
25
case INDEX_op_dup_vec:
20
MemOp opc = get_memop(oi);
26
if (arg_is_const(op->args[1])) {
21
MemOp s_bits = opc & MO_SIZE;
27
tmp = arg_info(op->args[1])->val;
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
tmp = dup_const(TCGOP_VECE(op), tmp);
23
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
29
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
24
30
- break;
25
/* Load the tlb comparator into X0, and the fast path addend into X1. */
31
+ continue;
26
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
32
}
27
+ tcg_out_ld(s, addr_type, TCG_REG_X0, TCG_REG_X1,
33
- goto do_default;
28
is_ld ? offsetof(CPUTLBEntry, addr_read)
34
+ break;
29
: offsetof(CPUTLBEntry, addr_write));
35
30
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
36
case INDEX_op_dup2_vec:
31
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
37
assert(TCG_TARGET_REG_BITS == 32);
32
if (a_mask >= s_mask) {
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
x3 = addr_reg;
39
tcg_opt_gen_movi(s, &ctx, op, op->args[0],
34
} else {
40
deposit64(arg_info(op->args[1])->val, 32, 32,
35
- tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
41
arg_info(op->args[2])->val));
36
+ tcg_out_insn(s, 3401, ADDI, addr_type,
42
- break;
37
TCG_REG_X3, addr_reg, s_mask - a_mask);
43
+ continue;
38
x3 = TCG_REG_X3;
44
} else if (args_are_copies(op->args[1], op->args[2])) {
39
}
45
op->opc = INDEX_op_dup_vec;
40
compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
46
TCGOP_VECE(op) = MO_32;
41
47
nb_iargs = 1;
42
/* Store the page mask part of the address into X3. */
48
}
43
- tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
49
- goto do_default;
44
- TCG_REG_X3, x3, compare_mask);
50
+ break;
45
+ tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
51
46
52
CASE_OP_32_64(not):
47
/* Perform the address comparison. */
53
CASE_OP_32_64(neg):
48
- tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
+ tcg_out_cmp(s, addr_type, TCG_REG_X0, TCG_REG_X3, 0);
55
if (arg_is_const(op->args[1])) {
50
56
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
51
/* If not equal, we jump to the slow path. */
57
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
52
ldst->label_ptr[0] = s->code_ptr;
58
- break;
59
+ continue;
60
}
61
- goto do_default;
62
+ break;
63
64
CASE_OP_32_64(bswap16):
65
CASE_OP_32_64(bswap32):
66
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
67
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
68
op->args[2]);
69
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
70
- break;
71
+ continue;
72
}
73
- goto do_default;
74
+ break;
75
76
CASE_OP_32_64(add):
77
CASE_OP_32_64(sub):
78
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
79
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
80
arg_info(op->args[2])->val);
81
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
82
- break;
83
+ continue;
84
}
85
- goto do_default;
86
+ break;
87
88
CASE_OP_32_64(clz):
89
CASE_OP_32_64(ctz):
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
} else {
92
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
93
}
94
- break;
95
+ continue;
96
}
97
- goto do_default;
98
+ break;
99
100
CASE_OP_32_64(deposit):
101
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
102
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
103
op->args[3], op->args[4],
104
arg_info(op->args[2])->val);
105
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
106
- break;
107
+ continue;
108
}
109
- goto do_default;
110
+ break;
111
112
CASE_OP_32_64(extract):
113
if (arg_is_const(op->args[1])) {
114
tmp = extract64(arg_info(op->args[1])->val,
115
op->args[2], op->args[3]);
116
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
117
- break;
118
+ continue;
119
}
120
- goto do_default;
121
+ break;
122
123
CASE_OP_32_64(sextract):
124
if (arg_is_const(op->args[1])) {
125
tmp = sextract64(arg_info(op->args[1])->val,
126
op->args[2], op->args[3]);
127
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
128
- break;
129
+ continue;
130
}
131
- goto do_default;
132
+ break;
133
134
CASE_OP_32_64(extract2):
135
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
136
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
((uint32_t)v2 << (32 - shr)));
138
}
139
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
140
- break;
141
+ continue;
142
}
143
- goto do_default;
144
+ break;
145
146
CASE_OP_32_64(setcond):
147
tmp = do_constant_folding_cond(opc, op->args[1],
148
op->args[2], op->args[3]);
149
if (tmp != 2) {
150
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
151
- break;
152
+ continue;
153
}
154
- goto do_default;
155
+ break;
156
157
CASE_OP_32_64(brcond):
158
tmp = do_constant_folding_cond(opc, op->args[0],
159
op->args[1], op->args[2]);
160
- if (tmp != 2) {
161
- if (tmp) {
162
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
163
- op->opc = INDEX_op_br;
164
- op->args[0] = op->args[3];
165
- } else {
166
- tcg_op_remove(s, op);
167
- }
168
+ switch (tmp) {
169
+ case 0:
170
+ tcg_op_remove(s, op);
171
+ continue;
172
+ case 1:
173
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
174
+ op->opc = opc = INDEX_op_br;
175
+ op->args[0] = op->args[3];
176
break;
177
}
178
- goto do_default;
179
+ break;
180
181
CASE_OP_32_64(movcond):
182
tmp = do_constant_folding_cond(opc, op->args[1],
183
op->args[2], op->args[5]);
184
if (tmp != 2) {
185
tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
186
- break;
187
+ continue;
188
}
189
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
190
uint64_t tv = arg_info(op->args[3])->val;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
if (fv == 1 && tv == 0) {
193
cond = tcg_invert_cond(cond);
194
} else if (!(tv == 1 && fv == 0)) {
195
- goto do_default;
196
+ break;
197
}
198
op->args[3] = cond;
199
op->opc = opc = (opc == INDEX_op_movcond_i32
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
: INDEX_op_setcond_i64);
202
nb_iargs = 2;
203
}
204
- goto do_default;
205
+ break;
206
207
case INDEX_op_add2_i32:
208
case INDEX_op_sub2_i32:
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
rh = op->args[1];
211
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
212
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
213
- break;
214
+ continue;
215
}
216
- goto do_default;
217
+ break;
218
219
case INDEX_op_mulu2_i32:
220
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
221
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
222
rh = op->args[1];
223
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
224
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
225
- break;
226
+ continue;
227
}
228
- goto do_default;
229
+ break;
230
231
case INDEX_op_brcond2_i32:
232
tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
233
op->args[4]);
234
- if (tmp != 2) {
235
- if (tmp) {
236
- do_brcond_true:
237
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
238
- op->opc = INDEX_op_br;
239
- op->args[0] = op->args[5];
240
- } else {
241
+ if (tmp == 0) {
242
do_brcond_false:
243
- tcg_op_remove(s, op);
244
- }
245
- } else if ((op->args[4] == TCG_COND_LT
246
- || op->args[4] == TCG_COND_GE)
247
- && arg_is_const(op->args[2])
248
- && arg_info(op->args[2])->val == 0
249
- && arg_is_const(op->args[3])
250
- && arg_info(op->args[3])->val == 0) {
251
+ tcg_op_remove(s, op);
252
+ continue;
253
+ }
254
+ if (tmp == 1) {
255
+ do_brcond_true:
256
+ op->opc = opc = INDEX_op_br;
257
+ op->args[0] = op->args[5];
258
+ break;
259
+ }
260
+ if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
261
+ && arg_is_const(op->args[2])
262
+ && arg_info(op->args[2])->val == 0
263
+ && arg_is_const(op->args[3])
264
+ && arg_info(op->args[3])->val == 0) {
265
/* Simplify LT/GE comparisons vs zero to a single compare
266
vs the high word of the input. */
267
do_brcond_high:
268
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
269
- op->opc = INDEX_op_brcond_i32;
270
+ op->opc = opc = INDEX_op_brcond_i32;
271
op->args[0] = op->args[1];
272
op->args[1] = op->args[3];
273
op->args[2] = op->args[4];
274
op->args[3] = op->args[5];
275
- } else if (op->args[4] == TCG_COND_EQ) {
276
+ break;
277
+ }
278
+ if (op->args[4] == TCG_COND_EQ) {
279
/* Simplify EQ comparisons where one of the pairs
280
can be simplified. */
281
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
283
if (tmp == 0) {
284
goto do_brcond_false;
285
} else if (tmp != 1) {
286
- goto do_default;
287
+ break;
288
}
289
do_brcond_low:
290
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
292
op->args[1] = op->args[2];
293
op->args[2] = op->args[4];
294
op->args[3] = op->args[5];
295
- } else if (op->args[4] == TCG_COND_NE) {
296
+ break;
297
+ }
298
+ if (op->args[4] == TCG_COND_NE) {
299
/* Simplify NE comparisons where one of the pairs
300
can be simplified. */
301
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
302
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
303
} else if (tmp == 1) {
304
goto do_brcond_true;
305
}
306
- goto do_default;
307
- } else {
308
- goto do_default;
309
}
310
break;
311
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (tmp != 2) {
314
do_setcond_const:
315
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
316
- } else if ((op->args[5] == TCG_COND_LT
317
- || op->args[5] == TCG_COND_GE)
318
- && arg_is_const(op->args[3])
319
- && arg_info(op->args[3])->val == 0
320
- && arg_is_const(op->args[4])
321
- && arg_info(op->args[4])->val == 0) {
322
+ continue;
323
+ }
324
+ if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
325
+ && arg_is_const(op->args[3])
326
+ && arg_info(op->args[3])->val == 0
327
+ && arg_is_const(op->args[4])
328
+ && arg_info(op->args[4])->val == 0) {
329
/* Simplify LT/GE comparisons vs zero to a single compare
330
vs the high word of the input. */
331
do_setcond_high:
332
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
333
op->args[1] = op->args[2];
334
op->args[2] = op->args[4];
335
op->args[3] = op->args[5];
336
- } else if (op->args[5] == TCG_COND_EQ) {
337
+ break;
338
+ }
339
+ if (op->args[5] == TCG_COND_EQ) {
340
/* Simplify EQ comparisons where one of the pairs
341
can be simplified. */
342
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
343
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
344
if (tmp == 0) {
345
goto do_setcond_high;
346
} else if (tmp != 1) {
347
- goto do_default;
348
+ break;
349
}
350
do_setcond_low:
351
reset_temp(op->args[0]);
352
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
353
op->opc = INDEX_op_setcond_i32;
354
op->args[2] = op->args[3];
355
op->args[3] = op->args[5];
356
- } else if (op->args[5] == TCG_COND_NE) {
357
+ break;
358
+ }
359
+ if (op->args[5] == TCG_COND_NE) {
360
/* Simplify NE comparisons where one of the pairs
361
can be simplified. */
362
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
363
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
364
} else if (tmp == 1) {
365
goto do_setcond_const;
366
}
367
- goto do_default;
368
- } else {
369
- goto do_default;
370
}
371
break;
372
373
- case INDEX_op_call:
374
- if (!(tcg_call_flags(op)
375
+ default:
376
+ break;
377
+ }
378
+
379
+ /* Some of the folding above can change opc. */
380
+ opc = op->opc;
381
+ def = &tcg_op_defs[opc];
382
+ if (def->flags & TCG_OPF_BB_END) {
383
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
384
+ } else {
385
+ if (opc == INDEX_op_call &&
386
+ !(tcg_call_flags(op)
387
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
388
for (i = 0; i < nb_globals; i++) {
389
if (test_bit(i, ctx.temps_used.l)) {
390
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
391
}
392
}
393
}
394
- goto do_reset_output;
395
396
- default:
397
- do_default:
398
- /* Default case: we know nothing about operation (or were unable
399
- to compute the operation result) so no propagation is done.
400
- We trash everything if the operation is the end of a basic
401
- block, otherwise we only trash the output args. "z_mask" is
402
- the non-zero bits mask for the first output arg. */
403
- if (def->flags & TCG_OPF_BB_END) {
404
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
405
- } else {
406
- do_reset_output:
407
- for (i = 0; i < nb_oargs; i++) {
408
- reset_temp(op->args[i]);
409
- /* Save the corresponding known-zero bits mask for the
410
- first output argument (only one supported so far). */
411
- if (i == 0) {
412
- arg_info(op->args[i])->z_mask = z_mask;
413
- }
414
+ for (i = 0; i < nb_oargs; i++) {
415
+ reset_temp(op->args[i]);
416
+ /* Save the corresponding known-zero bits mask for the
417
+ first output argument (only one supported so far). */
418
+ if (i == 0) {
419
+ arg_info(op->args[i])->z_mask = z_mask;
420
}
421
}
422
- break;
423
}
424
425
/* Eliminate duplicate and redundant fence instructions. */
426
--
53
--
427
2.25.1
54
2.34.1
428
55
429
56
diff view generated by jsdifflib
1
This will allow callers to tail call to these functions
1
All uses replaced with TCGContext.addr_type.
2
and return true indicating processing complete.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/optimize.c | 9 +++++----
6
tcg/loongarch64/tcg-target.c.inc | 9 +++++----
10
1 file changed, 5 insertions(+), 4 deletions(-)
7
1 file changed, 5 insertions(+), 4 deletions(-)
11
8
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
11
--- a/tcg/loongarch64/tcg-target.c.inc
15
+++ b/tcg/optimize.c
12
+++ b/tcg/loongarch64/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
13
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
17
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
14
TCGReg addr_reg, MemOpIdx oi,
18
}
15
bool is_ld)
19
20
-static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
21
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
22
{
16
{
23
TCGTemp *dst_ts = arg_temp(dst);
17
+ TCGType addr_type = s->addr_type;
24
TCGTemp *src_ts = arg_temp(src);
18
TCGLabelQemuLdst *ldst = NULL;
25
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
19
MemOp opc = get_memop(oi);
26
20
MemOp a_bits;
27
if (ts_are_copies(dst_ts, src_ts)) {
21
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
28
tcg_op_remove(ctx->tcg, op);
22
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
29
- return;
23
30
+ return true;
24
/* Load the tlb comparator and the addend. */
25
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
26
+ tcg_out_ld(s, addr_type, TCG_REG_TMP0, TCG_REG_TMP2,
27
is_ld ? offsetof(CPUTLBEntry, addr_read)
28
: offsetof(CPUTLBEntry, addr_write));
29
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
30
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
31
if (a_bits < s_bits) {
32
unsigned a_mask = (1u << a_bits) - 1;
33
unsigned s_mask = (1u << s_bits) - 1;
34
- tcg_out_addi(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
35
+ tcg_out_addi(s, addr_type, TCG_REG_TMP1, addr_reg, s_mask - a_mask);
36
} else {
37
- tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_TMP1, addr_reg);
38
+ tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
31
}
39
}
32
40
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
33
reset_ts(dst_ts);
41
a_bits, TARGET_PAGE_BITS - 1);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
42
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
35
di->is_const = si->is_const;
43
h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
36
di->val = si->val;
44
#endif
37
}
45
38
+ return true;
46
- if (TARGET_LONG_BITS == 32) {
39
}
47
+ if (addr_type == TCG_TYPE_I32) {
40
48
h->base = TCG_REG_TMP0;
41
-static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
49
tcg_out_ext32u(s, h->base, addr_reg);
42
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
50
} else {
43
TCGArg dst, uint64_t val)
44
{
45
const TCGOpDef *def = &tcg_op_defs[op->opc];
46
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
47
/* Convert movi to mov with constant temp. */
48
tv = tcg_constant_internal(type, val);
49
init_ts_info(ctx, tv);
50
- tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
51
+ return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
52
}
53
54
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
55
--
51
--
56
2.25.1
52
2.34.1
57
53
58
54
diff view generated by jsdifflib
1
This "garbage" setting pre-dates the addition of the type
1
All uses replaced with TCGContext.addr_type.
2
changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
3
and INDEX_op_extr{l,h}_i64_i32.
4
5
So now we have a definitive points at which to adjust z_mask
6
to eliminate such bits from the 32-bit operands.
7
2
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
5
---
12
tcg/optimize.c | 35 ++++++++++++++++-------------------
6
tcg/mips/tcg-target.c.inc | 42 +++++++++++++++++++++------------------
13
1 file changed, 16 insertions(+), 19 deletions(-)
7
1 file changed, 23 insertions(+), 19 deletions(-)
14
8
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
16
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
11
--- a/tcg/mips/tcg-target.c.inc
18
+++ b/tcg/optimize.c
12
+++ b/tcg/mips/tcg-target.c.inc
19
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
13
@@ -XXX,XX +XXX,XX @@ typedef enum {
20
ti->is_const = true;
14
/* Aliases for convenience. */
21
ti->val = ts->val;
15
ALIAS_PADD = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
22
ti->z_mask = ts->val;
16
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
23
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
17
- ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
24
- /* High bits of a 32-bit quantity are garbage. */
18
- ? OPC_SRL : OPC_DSRL,
25
- ti->z_mask |= ~0xffffffffull;
19
- ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
26
- }
20
- ? OPC_ADDIU : OPC_DADDIU,
21
} MIPSInsn;
22
23
/*
24
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
25
TCGReg addrlo, TCGReg addrhi,
26
MemOpIdx oi, bool is_ld)
27
{
28
+ TCGType addr_type = s->addr_type;
29
TCGLabelQemuLdst *ldst = NULL;
30
MemOp opc = get_memop(oi);
31
MemOp a_bits;
32
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
33
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
34
35
/* Extract the TLB index from the address into TMP3. */
36
- tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
37
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
38
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
39
+ tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
40
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
41
+ } else {
42
+ tcg_out_dsrl(s, TCG_TMP3, addrlo,
43
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
44
+ }
45
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
46
47
/* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
48
tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
49
50
- /* Load the (low-half) tlb comparator. */
51
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
52
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
53
- } else {
54
- tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
55
- }
56
-
57
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
58
+ if (TCG_TARGET_REG_BITS == 64 || addr_type == TCG_TYPE_I32) {
59
+ /* Load the tlb comparator. */
60
+ tcg_out_ld(s, addr_type, TCG_TMP0, TCG_TMP3, cmp_off);
61
/* Load the tlb addend for the fast path. */
62
tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
63
+ } else {
64
+ /* Load the low half of the tlb comparator. */
65
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
66
}
67
68
/*
69
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
70
* For unaligned accesses, compare against the end of the access to
71
* verify that it does not cross a page boundary.
72
*/
73
- tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
74
+ tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
75
if (a_mask < s_mask) {
76
- tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
77
+ if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
78
+ tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
79
+ } else {
80
+ tcg_out_opc_imm(s, OPC_DADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
81
+ }
82
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
27
} else {
83
} else {
28
ti->is_const = false;
84
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
29
ti->z_mask = -1;
30
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
31
TCGTemp *src_ts = arg_temp(src);
32
TempOptInfo *di;
33
TempOptInfo *si;
34
- uint64_t z_mask;
35
TCGOpcode new_op;
36
37
if (ts_are_copies(dst_ts, src_ts)) {
38
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
39
op->args[0] = dst;
40
op->args[1] = src;
41
42
- z_mask = si->z_mask;
43
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
44
- /* High bits of the destination are now garbage. */
45
- z_mask |= ~0xffffffffull;
46
- }
47
- di->z_mask = z_mask;
48
+ di->z_mask = si->z_mask;
49
50
if (src_ts->type == dst_ts->type) {
51
TempOptInfo *ni = ts_info(si->next_copy);
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
53
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
54
TCGArg dst, uint64_t val)
55
{
56
- /* Convert movi to mov with constant temp. */
57
- TCGTemp *tv = tcg_constant_internal(ctx->type, val);
58
+ TCGTemp *tv;
59
60
+ if (ctx->type == TCG_TYPE_I32) {
61
+ val = (int32_t)val;
62
+ }
63
+
64
+ /* Convert movi to mov with constant temp. */
65
+ tv = tcg_constant_internal(ctx->type, val);
66
init_ts_info(ctx, tv);
67
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
68
}
69
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
70
uint64_t z_mask = ctx->z_mask;
71
72
/*
73
- * 32-bit ops generate 32-bit results. For the result is zero test
74
- * below, we can ignore high bits, but for further optimizations we
75
- * need to record that the high bits contain garbage.
76
+ * 32-bit ops generate 32-bit results, which for the purpose of
77
+ * simplifying tcg are sign-extended. Certainly that's how we
78
+ * represent our constants elsewhere. Note that the bits will
79
+ * be reset properly for a 64-bit value when encountering the
80
+ * type changing opcodes.
81
*/
82
if (ctx->type == TCG_TYPE_I32) {
83
- ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
84
- a_mask &= MAKE_64BIT_MASK(0, 32);
85
- z_mask &= MAKE_64BIT_MASK(0, 32);
86
+ a_mask = (int32_t)a_mask;
87
+ z_mask = (int32_t)z_mask;
88
+ ctx->z_mask = z_mask;
89
}
85
}
90
86
91
if (z_mask == 0) {
87
/* Zero extend a 32-bit guest address for a 64-bit host. */
88
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
89
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
90
tcg_out_ext32u(s, TCG_TMP2, addrlo);
91
addrlo = TCG_TMP2;
92
}
93
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
95
96
/* Load and test the high half tlb comparator. */
97
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
98
+ if (TCG_TARGET_REG_BITS == 32 && addr_type != TCG_TYPE_I32) {
99
/* delay slot */
100
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
101
102
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
103
}
104
105
base = addrlo;
106
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
107
+ if (TCG_TARGET_REG_BITS == 64 && addr_type == TCG_TYPE_I32) {
108
tcg_out_ext32u(s, TCG_REG_A0, base);
109
base = TCG_REG_A0;
110
}
92
--
111
--
93
2.25.1
112
2.34.1
94
113
95
114
diff view generated by jsdifflib
1
The result is either 0 or 1, which means that we have
1
All uses replaced with TCGContext.addr_type.
2
a 2 bit signed result, and thus 62 bits of sign.
3
For clarity, use the smask_from_zmask function.
4
2
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
5
---
9
tcg/optimize.c | 2 ++
6
tcg/tcg.c | 27 ++++++++++++++-------------
10
1 file changed, 2 insertions(+)
7
1 file changed, 14 insertions(+), 13 deletions(-)
11
8
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/tcg.c b/tcg/tcg.c
13
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
11
--- a/tcg/tcg.c
15
+++ b/tcg/optimize.c
12
+++ b/tcg/tcg.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
14
next_arg = 1;
15
16
loc = &info->in[next_arg];
17
- if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
18
- nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
19
- ldst->addrlo_reg, ldst->addrhi_reg);
20
- tcg_out_helper_load_slots(s, nmov, mov, parm);
21
- next_arg += nmov;
22
- } else {
23
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
24
/*
25
* 32-bit host with 32-bit guest: zero-extend the guest address
26
* to 64-bits for the helper by storing the low part, then
27
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
28
tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
29
TCG_TYPE_I32, 0, parm);
30
next_arg += 2;
31
+ } else {
32
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
33
+ ldst->addrlo_reg, ldst->addrhi_reg);
34
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
35
+ next_arg += nmov;
17
}
36
}
18
37
19
ctx->z_mask = 1;
38
switch (info->out_kind) {
20
+ ctx->s_mask = smask_from_zmask(1);
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
21
return false;
40
22
}
41
/* Handle addr argument. */
23
42
loc = &info->in[next_arg];
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
43
- if (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 64) {
44
- n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, TCG_TYPE_TL,
45
- ldst->addrlo_reg, ldst->addrhi_reg);
46
- next_arg += n;
47
- nmov += n;
48
- } else {
49
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
50
/*
51
* 32-bit host with 32-bit guest: zero-extend the guest address
52
* to 64-bits for the helper by storing the low part. Later,
53
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
54
ldst->addrlo_reg, -1);
55
next_arg += 2;
56
nmov += 1;
57
+ } else {
58
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
59
+ ldst->addrlo_reg, ldst->addrhi_reg);
60
+ next_arg += n;
61
+ nmov += n;
25
}
62
}
26
63
27
ctx->z_mask = 1;
64
/* Handle data argument. */
28
+ ctx->s_mask = smask_from_zmask(1);
65
@@ -XXX,XX +XXX,XX @@ static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
29
return false;
66
g_assert_not_reached();
30
67
}
31
do_setcond_const:
68
69
- if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 32) {
70
+ if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
71
+ /* Zero extend the address by loading a zero for the high part. */
72
loc = &info->in[1 + !HOST_BIG_ENDIAN];
73
tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
74
}
32
--
75
--
33
2.25.1
76
2.34.1
34
77
35
78
diff view generated by jsdifflib
New patch
1
Disconnect guest page size from TCG compilation.
2
While this could be done via exec/target_page.h, we want to cache
3
the value across multiple memory access operations, so we might
4
as well initialize this early.
1
5
6
The changes within tcg/ are entirely mechanical:
7
8
sed -i s/TARGET_PAGE_BITS/s->page_bits/g
9
sed -i s/TARGET_PAGE_MASK/s->page_mask/g
10
11
Reviewed-by: Anton Johansson <anjo@rev.ng>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
include/tcg/tcg.h | 5 +++++
15
accel/tcg/translate-all.c | 4 ++++
16
tcg/aarch64/tcg-target.c.inc | 6 +++---
17
tcg/arm/tcg-target.c.inc | 10 +++++-----
18
tcg/i386/tcg-target.c.inc | 6 +++---
19
tcg/loongarch64/tcg-target.c.inc | 4 ++--
20
tcg/mips/tcg-target.c.inc | 6 +++---
21
tcg/ppc/tcg-target.c.inc | 14 +++++++-------
22
tcg/riscv/tcg-target.c.inc | 4 ++--
23
tcg/s390x/tcg-target.c.inc | 4 ++--
24
tcg/sparc64/tcg-target.c.inc | 4 ++--
25
11 files changed, 38 insertions(+), 29 deletions(-)
26
27
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/tcg/tcg.h
30
+++ b/include/tcg/tcg.h
31
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
32
int nb_ops;
33
TCGType addr_type; /* TCG_TYPE_I32 or TCG_TYPE_I64 */
34
35
+#ifdef CONFIG_SOFTMMU
36
+ int page_mask;
37
+ uint8_t page_bits;
38
+#endif
39
+
40
TCGRegSet reserved_regs;
41
intptr_t current_frame_offset;
42
intptr_t frame_start;
43
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/accel/tcg/translate-all.c
46
+++ b/accel/tcg/translate-all.c
47
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
48
tb_set_page_addr1(tb, -1);
49
tcg_ctx->gen_tb = tb;
50
tcg_ctx->addr_type = TCG_TYPE_TL;
51
+#ifdef CONFIG_SOFTMMU
52
+ tcg_ctx->page_bits = TARGET_PAGE_BITS;
53
+ tcg_ctx->page_mask = TARGET_PAGE_MASK;
54
+#endif
55
56
tb_overflow:
57
58
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
59
index XXXXXXX..XXXXXXX 100644
60
--- a/tcg/aarch64/tcg-target.c.inc
61
+++ b/tcg/aarch64/tcg-target.c.inc
62
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
63
ldst->oi = oi;
64
ldst->addrlo_reg = addr_reg;
65
66
- mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
67
+ mask_type = (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32
68
? TCG_TYPE_I64 : TCG_TYPE_I32);
69
70
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
71
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
72
/* Extract the TLB index from the address into X0. */
73
tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
74
TCG_REG_X0, TCG_REG_X0, addr_reg,
75
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
76
+ s->page_bits - CPU_TLB_ENTRY_BITS);
77
78
/* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
79
tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
80
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
81
TCG_REG_X3, addr_reg, s_mask - a_mask);
82
x3 = TCG_REG_X3;
83
}
84
- compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
85
+ compare_mask = (uint64_t)s->page_mask | a_mask;
86
87
/* Store the page mask part of the address into X3. */
88
tcg_out_logicali(s, I3404_ANDI, addr_type, TCG_REG_X3, x3, compare_mask);
89
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
90
index XXXXXXX..XXXXXXX 100644
91
--- a/tcg/arm/tcg-target.c.inc
92
+++ b/tcg/arm/tcg-target.c.inc
93
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
94
95
/* Extract the tlb index from the address into R0. */
96
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
97
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
98
+ SHIFT_IMM_LSR(s->page_bits - CPU_TLB_ENTRY_BITS));
99
100
/*
101
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
102
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
103
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
104
addrlo, s_mask - a_mask);
105
}
106
- if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
107
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
108
+ if (use_armv7_instructions && s->page_bits <= 16) {
109
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(s->page_mask | a_mask));
110
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
111
t_addr, TCG_REG_TMP, 0);
112
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
113
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
114
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
115
}
116
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
117
- SHIFT_IMM_LSR(TARGET_PAGE_BITS));
118
+ SHIFT_IMM_LSR(s->page_bits));
119
tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
120
0, TCG_REG_R2, TCG_REG_TMP,
121
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
122
+ SHIFT_IMM_LSL(s->page_bits));
123
}
124
125
if (s->addr_type != TCG_TYPE_I32) {
126
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
127
index XXXXXXX..XXXXXXX 100644
128
--- a/tcg/i386/tcg-target.c.inc
129
+++ b/tcg/i386/tcg-target.c.inc
130
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
131
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
132
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
133
hrexw = P_REXW;
134
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
135
+ if (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32) {
136
tlbtype = TCG_TYPE_I64;
137
tlbrexw = P_REXW;
138
}
139
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
140
141
tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
142
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
143
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
144
+ s->page_bits - CPU_TLB_ENTRY_BITS);
145
146
tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
147
TLB_MASK_TABLE_OFS(mem_index) +
148
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
149
tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
150
addrlo, s_mask - a_mask);
151
}
152
- tlb_mask = TARGET_PAGE_MASK | a_mask;
153
+ tlb_mask = s->page_mask | a_mask;
154
tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
155
156
/* cmp 0(TCG_REG_L0), TCG_REG_L1 */
157
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
158
index XXXXXXX..XXXXXXX 100644
159
--- a/tcg/loongarch64/tcg-target.c.inc
160
+++ b/tcg/loongarch64/tcg-target.c.inc
161
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
162
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
163
164
tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
165
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
166
+ s->page_bits - CPU_TLB_ENTRY_BITS);
167
tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
168
tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
169
170
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
171
tcg_out_mov(s, addr_type, TCG_REG_TMP1, addr_reg);
172
}
173
tcg_out_opc_bstrins_d(s, TCG_REG_TMP1, TCG_REG_ZERO,
174
- a_bits, TARGET_PAGE_BITS - 1);
175
+ a_bits, s->page_bits - 1);
176
177
/* Compare masked address with the TLB entry. */
178
ldst->label_ptr[0] = s->code_ptr;
179
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
180
index XXXXXXX..XXXXXXX 100644
181
--- a/tcg/mips/tcg-target.c.inc
182
+++ b/tcg/mips/tcg-target.c.inc
183
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
184
/* Extract the TLB index from the address into TMP3. */
185
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
186
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP3, addrlo,
187
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
188
+ s->page_bits - CPU_TLB_ENTRY_BITS);
189
} else {
190
tcg_out_dsrl(s, TCG_TMP3, addrlo,
191
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
192
+ s->page_bits - CPU_TLB_ENTRY_BITS);
193
}
194
tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
195
196
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
197
* For unaligned accesses, compare against the end of the access to
198
* verify that it does not cross a page boundary.
199
*/
200
- tcg_out_movi(s, addr_type, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
201
+ tcg_out_movi(s, addr_type, TCG_TMP1, s->page_mask | a_mask);
202
if (a_mask < s_mask) {
203
if (TCG_TARGET_REG_BITS == 32 || addr_type == TCG_TYPE_I32) {
204
tcg_out_opc_imm(s, OPC_ADDIU, TCG_TMP2, addrlo, s_mask - a_mask);
205
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
206
index XXXXXXX..XXXXXXX 100644
207
--- a/tcg/ppc/tcg-target.c.inc
208
+++ b/tcg/ppc/tcg-target.c.inc
209
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
210
/* Extract the page index, shifted into place for tlb index. */
211
if (TCG_TARGET_REG_BITS == 32) {
212
tcg_out_shri32(s, TCG_REG_R0, addrlo,
213
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
214
+ s->page_bits - CPU_TLB_ENTRY_BITS);
215
} else {
216
tcg_out_shri64(s, TCG_REG_R0, addrlo,
217
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
218
+ s->page_bits - CPU_TLB_ENTRY_BITS);
219
}
220
tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
221
222
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
223
a_bits = s_bits;
224
}
225
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
226
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
227
+ (32 - a_bits) & 31, 31 - s->page_bits);
228
} else {
229
TCGReg t = addrlo;
230
231
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
232
/* Mask the address for the requested alignment. */
233
if (TARGET_LONG_BITS == 32) {
234
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
235
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
236
+ (32 - a_bits) & 31, 31 - s->page_bits);
237
} else if (a_bits == 0) {
238
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
239
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - s->page_bits);
240
} else {
241
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
242
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
243
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
244
+ 64 - s->page_bits, s->page_bits - a_bits);
245
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, s->page_bits, 0);
246
}
247
}
248
249
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
250
index XXXXXXX..XXXXXXX 100644
251
--- a/tcg/riscv/tcg-target.c.inc
252
+++ b/tcg/riscv/tcg-target.c.inc
253
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
254
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
255
256
tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
257
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
258
+ s->page_bits - CPU_TLB_ENTRY_BITS);
259
tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
260
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
261
262
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
263
tcg_out_opc_imm(s, TARGET_LONG_BITS == 32 ? OPC_ADDIW : OPC_ADDI,
264
addr_adj, addr_reg, s_mask - a_mask);
265
}
266
- compare_mask = TARGET_PAGE_MASK | a_mask;
267
+ compare_mask = s->page_mask | a_mask;
268
if (compare_mask == sextreg(compare_mask, 0, 12)) {
269
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_adj, compare_mask);
270
} else {
271
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
272
index XXXXXXX..XXXXXXX 100644
273
--- a/tcg/s390x/tcg-target.c.inc
274
+++ b/tcg/s390x/tcg-target.c.inc
275
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
276
ldst->addrlo_reg = addr_reg;
277
278
tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
279
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
280
+ s->page_bits - CPU_TLB_ENTRY_BITS);
281
282
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
283
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
284
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
285
* cross pages using the address of the last byte of the access.
286
*/
287
a_off = (a_mask >= s_mask ? 0 : s_mask - a_mask);
288
- tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
289
+ tlb_mask = (uint64_t)s->page_mask | a_mask;
290
if (a_off == 0) {
291
tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
292
} else {
293
diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
294
index XXXXXXX..XXXXXXX 100644
295
--- a/tcg/sparc64/tcg-target.c.inc
296
+++ b/tcg/sparc64/tcg-target.c.inc
297
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
298
299
/* Extract the page index, shifted into place for tlb index. */
300
tcg_out_arithi(s, TCG_REG_T1, addr_reg,
301
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
302
+ s->page_bits - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
303
tcg_out_arith(s, TCG_REG_T1, TCG_REG_T1, TCG_REG_T2, ARITH_AND);
304
305
/* Add the tlb_table pointer, creating the CPUTLBEntry address into R2. */
306
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
307
h->base = TCG_REG_T1;
308
309
/* Mask out the page offset, except for the required alignment. */
310
- compare_mask = TARGET_PAGE_MASK | a_mask;
311
+ compare_mask = s->page_mask | a_mask;
312
if (check_fit_tl(compare_mask, 13)) {
313
tcg_out_arithi(s, TCG_REG_T3, addr_reg, compare_mask, ARITH_AND);
314
} else {
315
--
316
2.34.1
diff view generated by jsdifflib
New patch
1
Disconnect guest tlb parameters from TCG compilation.
1
2
3
Reviewed-by: Anton Johansson <anjo@rev.ng>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
include/tcg/tcg.h | 1 +
7
accel/tcg/translate-all.c | 1 +
8
tcg/aarch64/tcg-target.c.inc | 2 +-
9
tcg/i386/tcg-target.c.inc | 2 +-
10
4 files changed, 4 insertions(+), 2 deletions(-)
11
12
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/tcg/tcg.h
15
+++ b/include/tcg/tcg.h
16
@@ -XXX,XX +XXX,XX @@ struct TCGContext {
17
#ifdef CONFIG_SOFTMMU
18
int page_mask;
19
uint8_t page_bits;
20
+ uint8_t tlb_dyn_max_bits;
21
#endif
22
23
TCGRegSet reserved_regs;
24
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/accel/tcg/translate-all.c
27
+++ b/accel/tcg/translate-all.c
28
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_gen_code(CPUState *cpu,
29
#ifdef CONFIG_SOFTMMU
30
tcg_ctx->page_bits = TARGET_PAGE_BITS;
31
tcg_ctx->page_mask = TARGET_PAGE_MASK;
32
+ tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
33
#endif
34
35
tb_overflow:
36
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tcg/aarch64/tcg-target.c.inc
39
+++ b/tcg/aarch64/tcg-target.c.inc
40
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
41
ldst->oi = oi;
42
ldst->addrlo_reg = addr_reg;
43
44
- mask_type = (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32
45
+ mask_type = (s->page_bits + s->tlb_dyn_max_bits > 32
46
? TCG_TYPE_I64 : TCG_TYPE_I32);
47
48
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
49
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tcg/i386/tcg-target.c.inc
52
+++ b/tcg/i386/tcg-target.c.inc
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
54
trexw = (ttype == TCG_TYPE_I32 ? 0 : P_REXW);
55
if (TCG_TYPE_PTR == TCG_TYPE_I64) {
56
hrexw = P_REXW;
57
- if (s->page_bits + CPU_TLB_DYN_MAX_BITS > 32) {
58
+ if (s->page_bits + s->tlb_dyn_max_bits > 32) {
59
tlbtype = TCG_TYPE_I64;
60
tlbrexw = P_REXW;
61
}
62
--
63
2.34.1
diff view generated by jsdifflib
New patch
1
TCG will need this declaration, without all of the other
2
bits that come with cpu-all.h.
1
3
4
Reviewed-by: Thomas Huth <thuth@redhat.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
include/exec/cpu-all.h | 5 +----
8
include/exec/user/guest-base.h | 12 ++++++++++++
9
tcg/tcg.c | 3 +++
10
3 files changed, 16 insertions(+), 4 deletions(-)
11
create mode 100644 include/exec/user/guest-base.h
12
13
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/cpu-all.h
16
+++ b/include/exec/cpu-all.h
17
@@ -XXX,XX +XXX,XX @@
18
19
#if defined(CONFIG_USER_ONLY)
20
#include "exec/user/abitypes.h"
21
+#include "exec/user/guest-base.h"
22
23
-/* On some host systems the guest address space is reserved on the host.
24
- * This allows the guest address space to be offset to a convenient location.
25
- */
26
-extern uintptr_t guest_base;
27
extern bool have_guest_base;
28
29
/*
30
diff --git a/include/exec/user/guest-base.h b/include/exec/user/guest-base.h
31
new file mode 100644
32
index XXXXXXX..XXXXXXX
33
--- /dev/null
34
+++ b/include/exec/user/guest-base.h
35
@@ -XXX,XX +XXX,XX @@
36
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
37
+/*
38
+ * Declaration of guest_base.
39
+ * Copyright (c) 2003 Fabrice Bellard
40
+ */
41
+
42
+#ifndef EXEC_USER_GUEST_BASE_H
43
+#define EXEC_USER_GUEST_BASE_H
44
+
45
+extern uintptr_t guest_base;
46
+
47
+#endif
48
diff --git a/tcg/tcg.c b/tcg/tcg.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/tcg/tcg.c
51
+++ b/tcg/tcg.c
52
@@ -XXX,XX +XXX,XX @@
53
#include "tcg/tcg-temp-internal.h"
54
#include "tcg-internal.h"
55
#include "accel/tcg/perf.h"
56
+#ifdef CONFIG_USER_ONLY
57
+#include "exec/user/guest-base.h"
58
+#endif
59
60
/* Forward declarations for functions declared in tcg-target.c.inc and
61
used here. */
62
--
63
2.34.1
diff view generated by jsdifflib