1
The following changes since commit 3e08b2b9cb64bff2b73fa9128c0e49bfcde0dd40:
1
The following changes since commit 36eae3a732a1f2aa81391e871ac0e9bb3233e7d7:
2
2
3
Merge remote-tracking branch 'remotes/philmd-gitlab/tags/edk2-next-20200121' into staging (2020-01-21 15:29:25 +0000)
3
Merge remote-tracking branch 'remotes/dgilbert-gitlab/tags/pull-migration-20220302b' into staging (2022-03-02 20:55:48 +0000)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/rth7680/qemu.git tags/pull-tcg-20200121
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20220303
8
8
9
for you to fetch changes up to 75fa376cdab5e5db2c7fdd107358e16f95503ac6:
9
for you to fetch changes up to f23e6de25c31cadd9a3b7122f9384e6b259ce37f:
10
10
11
scripts/git.orderfile: Display decodetree before C source (2020-01-21 15:26:09 -1000)
11
tcg/loongarch64: Support TCG_TARGET_SIGNED_ADDR32 (2022-03-03 10:47:20 -1000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Remove another limit to NB_MMU_MODES.
14
Reorder do_constant_folding_cond test to satisfy valgrind.
15
Fix compilation using uclibc.
15
Fix value of MAX_OPC_PARAM_IARGS.
16
Fix defaulting of -accel parameters.
16
Add opcodes for vector nand, nor, eqv.
17
Tidy cputlb basic routines.
17
Support vector nand, nor, eqv on PPC and S390X hosts.
18
Adjust git.orderfile for decodetree.
18
Support AVX512VL, AVX512BW, AVX512DQ, and AVX512VBMI2.
19
Support 32-bit guest addresses as signed values.
19
20
20
----------------------------------------------------------------
21
----------------------------------------------------------------
21
Carlos Santos (1):
22
Alex Bennée (1):
22
util/cacheinfo: fix crash when compiling with uClibc
23
tcg/optimize: only read val after const check
23
24
24
Philippe Mathieu-Daudé (1):
25
Richard Henderson (28):
25
scripts/git.orderfile: Display decodetree before C source
26
tcg: Add opcodes for vector nand, nor, eqv
27
tcg/ppc: Implement vector NAND, NOR, EQV
28
tcg/s390x: Implement vector NAND, NOR, EQV
29
tcg/i386: Detect AVX512
30
tcg/i386: Add tcg_out_evex_opc
31
tcg/i386: Use tcg_can_emit_vec_op in expand_vec_cmp_noinv
32
tcg/i386: Implement avx512 variable shifts
33
tcg/i386: Implement avx512 scalar shift
34
tcg/i386: Implement avx512 immediate sari shift
35
tcg/i386: Implement avx512 immediate rotate
36
tcg/i386: Implement avx512 variable rotate
37
tcg/i386: Support avx512vbmi2 vector shift-double instructions
38
tcg/i386: Expand vector word rotate as avx512vbmi2 shift-double
39
tcg/i386: Remove rotls_vec from tcg_target_op_def
40
tcg/i386: Expand scalar rotate with avx512 insns
41
tcg/i386: Implement avx512 min/max/abs
42
tcg/i386: Implement avx512 multiply
43
tcg/i386: Implement more logical operations for avx512
44
tcg/i386: Implement bitsel for avx512
45
tcg: Add TCG_TARGET_SIGNED_ADDR32
46
accel/tcg: Split out g2h_tlbe
47
accel/tcg: Support TCG_TARGET_SIGNED_ADDR32 for softmmu
48
accel/tcg: Add guest_base_signed_addr32 for user-only
49
linux-user: Support TCG_TARGET_SIGNED_ADDR32
50
tcg/aarch64: Support TCG_TARGET_SIGNED_ADDR32
51
tcg/mips: Support TCG_TARGET_SIGNED_ADDR32
52
tcg/riscv: Support TCG_TARGET_SIGNED_ADDR32
53
tcg/loongarch64: Support TCG_TARGET_SIGNED_ADDR32
26
54
27
Richard Henderson (14):
55
Ziqiao Kong (1):
28
cputlb: Handle NB_MMU_MODES > TARGET_PAGE_BITS_MIN
56
tcg: Set MAX_OPC_PARAM_IARGS to 7
29
vl: Remove unused variable in configure_accelerators
30
vl: Reduce scope of variables in configure_accelerators
31
vl: Remove useless test in configure_accelerators
32
vl: Only choose enabled accelerators in configure_accelerators
33
cputlb: Merge tlb_table_flush_by_mmuidx into tlb_flush_one_mmuidx_locked
34
cputlb: Make tlb_n_entries private to cputlb.c
35
cputlb: Pass CPUTLBDescFast to tlb_n_entries and sizeof_tlb
36
cputlb: Hoist tlb portions in tlb_mmu_resize_locked
37
cputlb: Hoist tlb portions in tlb_flush_one_mmuidx_locked
38
cputlb: Split out tlb_mmu_flush_locked
39
cputlb: Partially merge tlb_dyn_init into tlb_init
40
cputlb: Initialize tlbs as flushed
41
cputlb: Hoist timestamp outside of loops over tlbs
42
57
43
include/exec/cpu_ldst.h | 5 -
58
include/exec/cpu-all.h | 20 +-
44
accel/tcg/cputlb.c | 287 +++++++++++++++++++++++++++++++++---------------
59
include/exec/cpu_ldst.h | 3 +-
45
util/cacheinfo.c | 10 +-
60
include/qemu/cpuid.h | 20 +-
46
vl.c | 27 +++--
61
include/tcg/tcg-opc.h | 3 +
47
scripts/git.orderfile | 3 +
62
include/tcg/tcg.h | 5 +-
48
5 files changed, 223 insertions(+), 109 deletions(-)
63
tcg/aarch64/tcg-target-sa32.h | 7 +
64
tcg/aarch64/tcg-target.h | 3 +
65
tcg/arm/tcg-target-sa32.h | 1 +
66
tcg/arm/tcg-target.h | 3 +
67
tcg/i386/tcg-target-con-set.h | 1 +
68
tcg/i386/tcg-target-sa32.h | 1 +
69
tcg/i386/tcg-target.h | 17 +-
70
tcg/i386/tcg-target.opc.h | 3 +
71
tcg/loongarch64/tcg-target-sa32.h | 1 +
72
tcg/mips/tcg-target-sa32.h | 9 +
73
tcg/ppc/tcg-target-sa32.h | 1 +
74
tcg/ppc/tcg-target.h | 3 +
75
tcg/riscv/tcg-target-sa32.h | 5 +
76
tcg/s390x/tcg-target-sa32.h | 1 +
77
tcg/s390x/tcg-target.h | 3 +
78
tcg/sparc/tcg-target-sa32.h | 1 +
79
tcg/tci/tcg-target-sa32.h | 1 +
80
accel/tcg/cputlb.c | 36 ++--
81
bsd-user/main.c | 4 +
82
linux-user/elfload.c | 62 ++++--
83
linux-user/main.c | 3 +
84
tcg/optimize.c | 20 +-
85
tcg/tcg-op-vec.c | 27 ++-
86
tcg/tcg.c | 10 +
87
tcg/aarch64/tcg-target.c.inc | 81 +++++---
88
tcg/i386/tcg-target.c.inc | 387 +++++++++++++++++++++++++++++++-------
89
tcg/loongarch64/tcg-target.c.inc | 15 +-
90
tcg/mips/tcg-target.c.inc | 10 +-
91
tcg/ppc/tcg-target.c.inc | 15 ++
92
tcg/riscv/tcg-target.c.inc | 8 +-
93
tcg/s390x/tcg-target.c.inc | 17 ++
94
tcg/tci/tcg-target.c.inc | 2 +-
95
37 files changed, 640 insertions(+), 169 deletions(-)
96
create mode 100644 tcg/aarch64/tcg-target-sa32.h
97
create mode 100644 tcg/arm/tcg-target-sa32.h
98
create mode 100644 tcg/i386/tcg-target-sa32.h
99
create mode 100644 tcg/loongarch64/tcg-target-sa32.h
100
create mode 100644 tcg/mips/tcg-target-sa32.h
101
create mode 100644 tcg/ppc/tcg-target-sa32.h
102
create mode 100644 tcg/riscv/tcg-target-sa32.h
103
create mode 100644 tcg/s390x/tcg-target-sa32.h
104
create mode 100644 tcg/sparc/tcg-target-sa32.h
105
create mode 100644 tcg/tci/tcg-target-sa32.h
49
106
diff view generated by jsdifflib
New patch
1
From: Alex Bennée <alex.bennee@linaro.org>
1
2
3
valgrind pointed out that arg_info()->val can be undefined which will
4
be the case if the arguments are not constant. The ordering of the
5
checks will have ensured we never relied on an undefined value but for
6
the sake of completeness re-order the code to be clear.
7
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
10
Message-Id: <20220209112142.3367525-1-alex.bennee@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/optimize.c | 8 ++++----
14
1 file changed, 4 insertions(+), 4 deletions(-)
15
16
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/optimize.c
19
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
21
static int do_constant_folding_cond(TCGType type, TCGArg x,
22
TCGArg y, TCGCond c)
23
{
24
- uint64_t xv = arg_info(x)->val;
25
- uint64_t yv = arg_info(y)->val;
26
-
27
if (arg_is_const(x) && arg_is_const(y)) {
28
+ uint64_t xv = arg_info(x)->val;
29
+ uint64_t yv = arg_info(y)->val;
30
+
31
switch (type) {
32
case TCG_TYPE_I32:
33
return do_constant_folding_cond_32(xv, yv, c);
34
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond(TCGType type, TCGArg x,
35
}
36
} else if (args_are_copies(x, y)) {
37
return do_constant_folding_cond_eq(c);
38
- } else if (arg_is_const(y) && yv == 0) {
39
+ } else if (arg_is_const(y) && arg_info(y)->val == 0) {
40
switch (c) {
41
case TCG_COND_LTU:
42
return 0;
43
--
44
2.25.1
45
46
diff view generated by jsdifflib
New patch
1
From: Ziqiao Kong <ziqiaokong@gmail.com>
1
2
3
The last entry of DEF_HELPERS_FLAGS_n is DEF_HELPER_FLAGS_7 and
4
thus the MAX_OPC_PARAM_IARGS should be 7.
5
6
Reviewed-by: Taylor Simpson <tsimpson@quicinc.com>
7
Signed-off-by: Ziqiao Kong <ziqiaokong@gmail.com>
8
Message-Id: <20220227113127.414533-2-ziqiaokong@gmail.com>
9
Fixes: e6cadf49c3d ("tcg: Add support for a helper with 7 arguments")
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/tcg/tcg.h | 2 +-
13
tcg/tci/tcg-target.c.inc | 2 +-
14
2 files changed, 2 insertions(+), 2 deletions(-)
15
16
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/include/tcg/tcg.h
19
+++ b/include/tcg/tcg.h
20
@@ -XXX,XX +XXX,XX @@
21
#else
22
#define MAX_OPC_PARAM_PER_ARG 1
23
#endif
24
-#define MAX_OPC_PARAM_IARGS 6
25
+#define MAX_OPC_PARAM_IARGS 7
26
#define MAX_OPC_PARAM_OARGS 1
27
#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
28
29
diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc
30
index XXXXXXX..XXXXXXX 100644
31
--- a/tcg/tci/tcg-target.c.inc
32
+++ b/tcg/tci/tcg-target.c.inc
33
@@ -XXX,XX +XXX,XX @@ static const int tcg_target_reg_alloc_order[] = {
34
TCG_REG_R0,
35
};
36
37
-#if MAX_OPC_PARAM_IARGS != 6
38
+#if MAX_OPC_PARAM_IARGS != 7
39
# error Fix needed, number of supported input arguments changed!
40
#endif
41
42
--
43
2.25.1
diff view generated by jsdifflib
New patch
1
1
We've had placeholders for these opcodes for a while,
2
and should have support on ppc, s390x and avx512 hosts.
3
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
include/tcg/tcg-opc.h | 3 +++
10
include/tcg/tcg.h | 3 +++
11
tcg/aarch64/tcg-target.h | 3 +++
12
tcg/arm/tcg-target.h | 3 +++
13
tcg/i386/tcg-target.h | 3 +++
14
tcg/ppc/tcg-target.h | 3 +++
15
tcg/s390x/tcg-target.h | 3 +++
16
tcg/optimize.c | 12 ++++++------
17
tcg/tcg-op-vec.c | 27 ++++++++++++++++++---------
18
tcg/tcg.c | 6 ++++++
19
10 files changed, 51 insertions(+), 15 deletions(-)
20
21
diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h
22
index XXXXXXX..XXXXXXX 100644
23
--- a/include/tcg/tcg-opc.h
24
+++ b/include/tcg/tcg-opc.h
25
@@ -XXX,XX +XXX,XX @@ DEF(or_vec, 1, 2, 0, IMPLVEC)
26
DEF(xor_vec, 1, 2, 0, IMPLVEC)
27
DEF(andc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_andc_vec))
28
DEF(orc_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_orc_vec))
29
+DEF(nand_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nand_vec))
30
+DEF(nor_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_nor_vec))
31
+DEF(eqv_vec, 1, 2, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_eqv_vec))
32
DEF(not_vec, 1, 1, 0, IMPLVEC | IMPL(TCG_TARGET_HAS_not_vec))
33
34
DEF(shli_vec, 1, 1, 1, IMPLVEC | IMPL(TCG_TARGET_HAS_shi_vec))
35
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/include/tcg/tcg.h
38
+++ b/include/tcg/tcg.h
39
@@ -XXX,XX +XXX,XX @@ typedef uint64_t TCGRegSet;
40
#define TCG_TARGET_HAS_not_vec 0
41
#define TCG_TARGET_HAS_andc_vec 0
42
#define TCG_TARGET_HAS_orc_vec 0
43
+#define TCG_TARGET_HAS_nand_vec 0
44
+#define TCG_TARGET_HAS_nor_vec 0
45
+#define TCG_TARGET_HAS_eqv_vec 0
46
#define TCG_TARGET_HAS_roti_vec 0
47
#define TCG_TARGET_HAS_rots_vec 0
48
#define TCG_TARGET_HAS_rotv_vec 0
49
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tcg/aarch64/tcg-target.h
52
+++ b/tcg/aarch64/tcg-target.h
53
@@ -XXX,XX +XXX,XX @@ typedef enum {
54
55
#define TCG_TARGET_HAS_andc_vec 1
56
#define TCG_TARGET_HAS_orc_vec 1
57
+#define TCG_TARGET_HAS_nand_vec 0
58
+#define TCG_TARGET_HAS_nor_vec 0
59
+#define TCG_TARGET_HAS_eqv_vec 0
60
#define TCG_TARGET_HAS_not_vec 1
61
#define TCG_TARGET_HAS_neg_vec 1
62
#define TCG_TARGET_HAS_abs_vec 1
63
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
64
index XXXXXXX..XXXXXXX 100644
65
--- a/tcg/arm/tcg-target.h
66
+++ b/tcg/arm/tcg-target.h
67
@@ -XXX,XX +XXX,XX @@ extern bool use_neon_instructions;
68
69
#define TCG_TARGET_HAS_andc_vec 1
70
#define TCG_TARGET_HAS_orc_vec 1
71
+#define TCG_TARGET_HAS_nand_vec 0
72
+#define TCG_TARGET_HAS_nor_vec 0
73
+#define TCG_TARGET_HAS_eqv_vec 0
74
#define TCG_TARGET_HAS_not_vec 1
75
#define TCG_TARGET_HAS_neg_vec 1
76
#define TCG_TARGET_HAS_abs_vec 1
77
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/tcg/i386/tcg-target.h
80
+++ b/tcg/i386/tcg-target.h
81
@@ -XXX,XX +XXX,XX @@ extern bool have_movbe;
82
83
#define TCG_TARGET_HAS_andc_vec 1
84
#define TCG_TARGET_HAS_orc_vec 0
85
+#define TCG_TARGET_HAS_nand_vec 0
86
+#define TCG_TARGET_HAS_nor_vec 0
87
+#define TCG_TARGET_HAS_eqv_vec 0
88
#define TCG_TARGET_HAS_not_vec 0
89
#define TCG_TARGET_HAS_neg_vec 0
90
#define TCG_TARGET_HAS_abs_vec 1
91
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
92
index XXXXXXX..XXXXXXX 100644
93
--- a/tcg/ppc/tcg-target.h
94
+++ b/tcg/ppc/tcg-target.h
95
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
96
97
#define TCG_TARGET_HAS_andc_vec 1
98
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
99
+#define TCG_TARGET_HAS_nand_vec 0
100
+#define TCG_TARGET_HAS_nor_vec 0
101
+#define TCG_TARGET_HAS_eqv_vec 0
102
#define TCG_TARGET_HAS_not_vec 1
103
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
104
#define TCG_TARGET_HAS_abs_vec 0
105
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
106
index XXXXXXX..XXXXXXX 100644
107
--- a/tcg/s390x/tcg-target.h
108
+++ b/tcg/s390x/tcg-target.h
109
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
110
111
#define TCG_TARGET_HAS_andc_vec 1
112
#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
113
+#define TCG_TARGET_HAS_nand_vec 0
114
+#define TCG_TARGET_HAS_nor_vec 0
115
+#define TCG_TARGET_HAS_eqv_vec 0
116
#define TCG_TARGET_HAS_not_vec 1
117
#define TCG_TARGET_HAS_neg_vec 1
118
#define TCG_TARGET_HAS_abs_vec 1
119
diff --git a/tcg/optimize.c b/tcg/optimize.c
120
index XXXXXXX..XXXXXXX 100644
121
--- a/tcg/optimize.c
122
+++ b/tcg/optimize.c
123
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
124
CASE_OP_32_64_VEC(orc):
125
return x | ~y;
126
127
- CASE_OP_32_64(eqv):
128
+ CASE_OP_32_64_VEC(eqv):
129
return ~(x ^ y);
130
131
- CASE_OP_32_64(nand):
132
+ CASE_OP_32_64_VEC(nand):
133
return ~(x & y);
134
135
- CASE_OP_32_64(nor):
136
+ CASE_OP_32_64_VEC(nor):
137
return ~(x | y);
138
139
case INDEX_op_clz_i32:
140
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
141
case INDEX_op_dup2_vec:
142
done = fold_dup2(&ctx, op);
143
break;
144
- CASE_OP_32_64(eqv):
145
+ CASE_OP_32_64_VEC(eqv):
146
done = fold_eqv(&ctx, op);
147
break;
148
CASE_OP_32_64(extract):
149
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
150
CASE_OP_32_64(mulu2):
151
done = fold_multiply2(&ctx, op);
152
break;
153
- CASE_OP_32_64(nand):
154
+ CASE_OP_32_64_VEC(nand):
155
done = fold_nand(&ctx, op);
156
break;
157
CASE_OP_32_64(neg):
158
done = fold_neg(&ctx, op);
159
break;
160
- CASE_OP_32_64(nor):
161
+ CASE_OP_32_64_VEC(nor):
162
done = fold_nor(&ctx, op);
163
break;
164
CASE_OP_32_64_VEC(not):
165
diff --git a/tcg/tcg-op-vec.c b/tcg/tcg-op-vec.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/tcg/tcg-op-vec.c
168
+++ b/tcg/tcg-op-vec.c
169
@@ -XXX,XX +XXX,XX @@ void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
170
171
void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
172
{
173
- /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
174
- tcg_gen_and_vec(0, r, a, b);
175
- tcg_gen_not_vec(0, r, r);
176
+ if (TCG_TARGET_HAS_nand_vec) {
177
+ vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b);
178
+ } else {
179
+ tcg_gen_and_vec(0, r, a, b);
180
+ tcg_gen_not_vec(0, r, r);
181
+ }
182
}
183
184
void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
185
{
186
- /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
187
- tcg_gen_or_vec(0, r, a, b);
188
- tcg_gen_not_vec(0, r, r);
189
+ if (TCG_TARGET_HAS_nor_vec) {
190
+ vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b);
191
+ } else {
192
+ tcg_gen_or_vec(0, r, a, b);
193
+ tcg_gen_not_vec(0, r, r);
194
+ }
195
}
196
197
void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
198
{
199
- /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
200
- tcg_gen_xor_vec(0, r, a, b);
201
- tcg_gen_not_vec(0, r, r);
202
+ if (TCG_TARGET_HAS_eqv_vec) {
203
+ vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b);
204
+ } else {
205
+ tcg_gen_xor_vec(0, r, a, b);
206
+ tcg_gen_not_vec(0, r, r);
207
+ }
208
}
209
210
static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
211
diff --git a/tcg/tcg.c b/tcg/tcg.c
212
index XXXXXXX..XXXXXXX 100644
213
--- a/tcg/tcg.c
214
+++ b/tcg/tcg.c
215
@@ -XXX,XX +XXX,XX @@ bool tcg_op_supported(TCGOpcode op)
216
return have_vec && TCG_TARGET_HAS_andc_vec;
217
case INDEX_op_orc_vec:
218
return have_vec && TCG_TARGET_HAS_orc_vec;
219
+ case INDEX_op_nand_vec:
220
+ return have_vec && TCG_TARGET_HAS_nand_vec;
221
+ case INDEX_op_nor_vec:
222
+ return have_vec && TCG_TARGET_HAS_nor_vec;
223
+ case INDEX_op_eqv_vec:
224
+ return have_vec && TCG_TARGET_HAS_eqv_vec;
225
case INDEX_op_mul_vec:
226
return have_vec && TCG_TARGET_HAS_mul_vec;
227
case INDEX_op_shli_vec:
228
--
229
2.25.1
230
231
diff view generated by jsdifflib
New patch
1
Tested-by: Alex Bennée <alex.bennee@linaro.org>
2
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/ppc/tcg-target.h | 6 +++---
7
tcg/ppc/tcg-target.c.inc | 15 +++++++++++++++
8
2 files changed, 18 insertions(+), 3 deletions(-)
1
9
10
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/ppc/tcg-target.h
13
+++ b/tcg/ppc/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ extern bool have_vsx;
15
16
#define TCG_TARGET_HAS_andc_vec 1
17
#define TCG_TARGET_HAS_orc_vec have_isa_2_07
18
-#define TCG_TARGET_HAS_nand_vec 0
19
-#define TCG_TARGET_HAS_nor_vec 0
20
-#define TCG_TARGET_HAS_eqv_vec 0
21
+#define TCG_TARGET_HAS_nand_vec have_isa_2_07
22
+#define TCG_TARGET_HAS_nor_vec 1
23
+#define TCG_TARGET_HAS_eqv_vec have_isa_2_07
24
#define TCG_TARGET_HAS_not_vec 1
25
#define TCG_TARGET_HAS_neg_vec have_isa_3_00
26
#define TCG_TARGET_HAS_abs_vec 0
27
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/ppc/tcg-target.c.inc
30
+++ b/tcg/ppc/tcg-target.c.inc
31
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
32
case INDEX_op_xor_vec:
33
case INDEX_op_andc_vec:
34
case INDEX_op_not_vec:
35
+ case INDEX_op_nor_vec:
36
+ case INDEX_op_eqv_vec:
37
+ case INDEX_op_nand_vec:
38
return 1;
39
case INDEX_op_orc_vec:
40
return have_isa_2_07;
41
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
42
case INDEX_op_orc_vec:
43
insn = VORC;
44
break;
45
+ case INDEX_op_nand_vec:
46
+ insn = VNAND;
47
+ break;
48
+ case INDEX_op_nor_vec:
49
+ insn = VNOR;
50
+ break;
51
+ case INDEX_op_eqv_vec:
52
+ insn = VEQV;
53
+ break;
54
55
case INDEX_op_cmp_vec:
56
switch (args[3]) {
57
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
58
case INDEX_op_xor_vec:
59
case INDEX_op_andc_vec:
60
case INDEX_op_orc_vec:
61
+ case INDEX_op_nor_vec:
62
+ case INDEX_op_eqv_vec:
63
+ case INDEX_op_nand_vec:
64
case INDEX_op_cmp_vec:
65
case INDEX_op_ssadd_vec:
66
case INDEX_op_sssub_vec:
67
--
68
2.25.1
69
70
diff view generated by jsdifflib
New patch
1
Tested-by: Alex Bennée <alex.bennee@linaro.org>
2
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/s390x/tcg-target.h | 6 +++---
7
tcg/s390x/tcg-target.c.inc | 17 +++++++++++++++++
8
2 files changed, 20 insertions(+), 3 deletions(-)
1
9
10
diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/s390x/tcg-target.h
13
+++ b/tcg/s390x/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ extern uint64_t s390_facilities[3];
15
16
#define TCG_TARGET_HAS_andc_vec 1
17
#define TCG_TARGET_HAS_orc_vec HAVE_FACILITY(VECTOR_ENH1)
18
-#define TCG_TARGET_HAS_nand_vec 0
19
-#define TCG_TARGET_HAS_nor_vec 0
20
-#define TCG_TARGET_HAS_eqv_vec 0
21
+#define TCG_TARGET_HAS_nand_vec HAVE_FACILITY(VECTOR_ENH1)
22
+#define TCG_TARGET_HAS_nor_vec 1
23
+#define TCG_TARGET_HAS_eqv_vec HAVE_FACILITY(VECTOR_ENH1)
24
#define TCG_TARGET_HAS_not_vec 1
25
#define TCG_TARGET_HAS_neg_vec 1
26
#define TCG_TARGET_HAS_abs_vec 1
27
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/s390x/tcg-target.c.inc
30
+++ b/tcg/s390x/tcg-target.c.inc
31
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
32
VRRc_VMXL = 0xe7fd,
33
VRRc_VN = 0xe768,
34
VRRc_VNC = 0xe769,
35
+ VRRc_VNN = 0xe76e,
36
VRRc_VNO = 0xe76b,
37
+ VRRc_VNX = 0xe76c,
38
VRRc_VO = 0xe76a,
39
VRRc_VOC = 0xe76f,
40
VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
41
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
42
case INDEX_op_xor_vec:
43
tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
44
break;
45
+ case INDEX_op_nand_vec:
46
+ tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
47
+ break;
48
+ case INDEX_op_nor_vec:
49
+ tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
50
+ break;
51
+ case INDEX_op_eqv_vec:
52
+ tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
53
+ break;
54
55
case INDEX_op_shli_vec:
56
tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
57
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
58
case INDEX_op_and_vec:
59
case INDEX_op_andc_vec:
60
case INDEX_op_bitsel_vec:
61
+ case INDEX_op_eqv_vec:
62
+ case INDEX_op_nand_vec:
63
case INDEX_op_neg_vec:
64
+ case INDEX_op_nor_vec:
65
case INDEX_op_not_vec:
66
case INDEX_op_or_vec:
67
case INDEX_op_orc_vec:
68
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
69
case INDEX_op_or_vec:
70
case INDEX_op_orc_vec:
71
case INDEX_op_xor_vec:
72
+ case INDEX_op_nand_vec:
73
+ case INDEX_op_nor_vec:
74
+ case INDEX_op_eqv_vec:
75
case INDEX_op_cmp_vec:
76
case INDEX_op_mul_vec:
77
case INDEX_op_rotlv_vec:
78
--
79
2.25.1
80
81
diff view generated by jsdifflib
1
By choosing "tcg:kvm" when kvm is not enabled, we generate
1
There are some operation sizes in some subsets of AVX512 that
2
an incorrect warning: "invalid accelerator kvm".
2
are missing from previous iterations of AVX. Detect them.
3
3
4
At the same time, use g_str_has_suffix rather than open-coding
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
5
the same operation.
6
7
Presumably the inverse is also true with --disable-tcg.
8
9
Fixes: 28a0961757fc
10
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
11
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
7
---
15
vl.c | 21 +++++++++++++--------
8
include/qemu/cpuid.h | 20 +++++++++++++++++---
16
1 file changed, 13 insertions(+), 8 deletions(-)
9
tcg/i386/tcg-target.h | 4 ++++
10
tcg/i386/tcg-target.c.inc | 24 ++++++++++++++++++++++--
11
3 files changed, 43 insertions(+), 5 deletions(-)
17
12
18
diff --git a/vl.c b/vl.c
13
diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
19
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
20
--- a/vl.c
15
--- a/include/qemu/cpuid.h
21
+++ b/vl.c
16
+++ b/include/qemu/cpuid.h
22
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
17
@@ -XXX,XX +XXX,XX @@
23
18
#ifndef bit_AVX2
24
if (accel == NULL) {
19
#define bit_AVX2 (1 << 5)
25
/* Select the default accelerator */
20
#endif
26
- if (!accel_find("tcg") && !accel_find("kvm")) {
21
-#ifndef bit_AVX512F
27
- error_report("No accelerator selected and"
22
-#define bit_AVX512F (1 << 16)
28
- " no default accelerator available");
23
-#endif
29
- exit(1);
24
#ifndef bit_BMI2
30
- } else {
25
#define bit_BMI2 (1 << 8)
31
- int pnlen = strlen(progname);
26
#endif
32
- if (pnlen >= 3 && g_str_equal(&progname[pnlen - 3], "kvm")) {
27
+#ifndef bit_AVX512F
33
+ bool have_tcg = accel_find("tcg");
28
+#define bit_AVX512F (1 << 16)
34
+ bool have_kvm = accel_find("kvm");
29
+#endif
30
+#ifndef bit_AVX512DQ
31
+#define bit_AVX512DQ (1 << 17)
32
+#endif
33
+#ifndef bit_AVX512BW
34
+#define bit_AVX512BW (1 << 30)
35
+#endif
36
+#ifndef bit_AVX512VL
37
+#define bit_AVX512VL (1u << 31)
38
+#endif
35
+
39
+
36
+ if (have_tcg && have_kvm) {
40
+/* Leaf 7, %ecx */
37
+ if (g_str_has_suffix(progname, "kvm")) {
41
+#ifndef bit_AVX512VBMI2
38
/* If the program name ends with "kvm", we prefer KVM */
42
+#define bit_AVX512VBMI2 (1 << 6)
39
accel = "kvm:tcg";
43
+#endif
40
} else {
44
41
accel = "tcg:kvm";
45
/* Leaf 0x80000001, %ecx */
42
}
46
#ifndef bit_LZCNT
43
+ } else if (have_kvm) {
47
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
44
+ accel = "kvm";
48
index XXXXXXX..XXXXXXX 100644
45
+ } else if (have_tcg) {
49
--- a/tcg/i386/tcg-target.h
46
+ accel = "tcg";
50
+++ b/tcg/i386/tcg-target.h
47
+ } else {
51
@@ -XXX,XX +XXX,XX @@ extern bool have_bmi1;
48
+ error_report("No accelerator selected and"
52
extern bool have_popcnt;
49
+ " no default accelerator available");
53
extern bool have_avx1;
50
+ exit(1);
54
extern bool have_avx2;
55
+extern bool have_avx512bw;
56
+extern bool have_avx512dq;
57
+extern bool have_avx512vbmi2;
58
+extern bool have_avx512vl;
59
extern bool have_movbe;
60
61
/* optional instructions */
62
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
63
index XXXXXXX..XXXXXXX 100644
64
--- a/tcg/i386/tcg-target.c.inc
65
+++ b/tcg/i386/tcg-target.c.inc
66
@@ -XXX,XX +XXX,XX @@ bool have_bmi1;
67
bool have_popcnt;
68
bool have_avx1;
69
bool have_avx2;
70
+bool have_avx512bw;
71
+bool have_avx512dq;
72
+bool have_avx512vbmi2;
73
+bool have_avx512vl;
74
bool have_movbe;
75
76
#ifdef CONFIG_CPUID_H
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
78
static void tcg_target_init(TCGContext *s)
79
{
80
#ifdef CONFIG_CPUID_H
81
- unsigned a, b, c, d, b7 = 0;
82
+ unsigned a, b, c, d, b7 = 0, c7 = 0;
83
unsigned max = __get_cpuid_max(0, 0);
84
85
if (max >= 7) {
86
/* BMI1 is available on AMD Piledriver and Intel Haswell CPUs. */
87
- __cpuid_count(7, 0, a, b7, c, d);
88
+ __cpuid_count(7, 0, a, b7, c7, d);
89
have_bmi1 = (b7 & bit_BMI) != 0;
90
have_bmi2 = (b7 & bit_BMI2) != 0;
91
}
92
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
93
if ((xcrl & 6) == 6) {
94
have_avx1 = (c & bit_AVX) != 0;
95
have_avx2 = (b7 & bit_AVX2) != 0;
96
+
97
+ /*
98
+ * There are interesting instructions in AVX512, so long
99
+ * as we have AVX512VL, which indicates support for EVEX
100
+ * on sizes smaller than 512 bits. We are required to
101
+ * check that OPMASK and all extended ZMM state are enabled
102
+ * even if we're not using them -- the insns will fault.
103
+ */
104
+ if ((xcrl & 0xe0) == 0xe0
105
+ && (b7 & bit_AVX512F)
106
+ && (b7 & bit_AVX512VL)) {
107
+ have_avx512vl = true;
108
+ have_avx512bw = (b7 & bit_AVX512BW) != 0;
109
+ have_avx512dq = (b7 & bit_AVX512DQ) != 0;
110
+ have_avx512vbmi2 = (c7 & bit_AVX512VBMI2) != 0;
111
+ }
51
}
112
}
52
}
113
}
53
-
114
}
54
accel_list = g_strsplit(accel, ":", 0);
55
56
for (tmp = accel_list; *tmp; tmp++) {
57
--
115
--
58
2.20.1
116
2.25.1
59
117
60
118
diff view generated by jsdifflib
1
We will want to be able to flush a tlb without resizing.
1
The evex encoding is added here, for use in a subsequent patch.
2
2
3
Tested-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
accel/tcg/cputlb.c | 15 ++++++++++-----
7
tcg/i386/tcg-target.c.inc | 51 ++++++++++++++++++++++++++++++++++++++-
9
1 file changed, 10 insertions(+), 5 deletions(-)
8
1 file changed, 50 insertions(+), 1 deletion(-)
10
9
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
12
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/accel/tcg/cputlb.c
13
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
16
}
15
#define P_SIMDF3 0x20000 /* 0xf3 opcode prefix */
16
#define P_SIMDF2 0x40000 /* 0xf2 opcode prefix */
17
#define P_VEXL 0x80000 /* Set VEX.L = 1 */
18
+#define P_EVEX 0x100000 /* Requires EVEX encoding */
19
20
#define OPC_ARITH_EvIz    (0x81)
21
#define OPC_ARITH_EvIb    (0x83)
22
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vex_opc(TCGContext *s, int opc, int r, int v,
23
tcg_out8(s, opc);
17
}
24
}
18
25
19
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
26
+static void tcg_out_evex_opc(TCGContext *s, int opc, int r, int v,
20
+static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
27
+ int rm, int index)
21
{
22
- CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
23
- CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
24
-
25
- tlb_mmu_resize_locked(desc, fast);
26
desc->n_used_entries = 0;
27
desc->large_page_addr = -1;
28
desc->large_page_mask = -1;
29
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
30
memset(desc->vtable, -1, sizeof(desc->vtable));
31
}
32
33
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
34
+{
28
+{
35
+ CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
29
+ /* The entire 4-byte evex prefix; with R' and V' set. */
36
+ CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
30
+ uint32_t p = 0x08041062;
31
+ int mm, pp;
37
+
32
+
38
+ tlb_mmu_resize_locked(desc, fast);
33
+ tcg_debug_assert(have_avx512vl);
39
+ tlb_mmu_flush_locked(desc, fast);
34
+
35
+ /* EVEX.mm */
36
+ if (opc & P_EXT3A) {
37
+ mm = 3;
38
+ } else if (opc & P_EXT38) {
39
+ mm = 2;
40
+ } else if (opc & P_EXT) {
41
+ mm = 1;
42
+ } else {
43
+ g_assert_not_reached();
44
+ }
45
+
46
+ /* EVEX.pp */
47
+ if (opc & P_DATA16) {
48
+ pp = 1; /* 0x66 */
49
+ } else if (opc & P_SIMDF3) {
50
+ pp = 2; /* 0xf3 */
51
+ } else if (opc & P_SIMDF2) {
52
+ pp = 3; /* 0xf2 */
53
+ } else {
54
+ pp = 0;
55
+ }
56
+
57
+ p = deposit32(p, 8, 2, mm);
58
+ p = deposit32(p, 13, 1, (rm & 8) == 0); /* EVEX.RXB.B */
59
+ p = deposit32(p, 14, 1, (index & 8) == 0); /* EVEX.RXB.X */
60
+ p = deposit32(p, 15, 1, (r & 8) == 0); /* EVEX.RXB.R */
61
+ p = deposit32(p, 16, 2, pp);
62
+ p = deposit32(p, 19, 4, ~v);
63
+ p = deposit32(p, 23, 1, (opc & P_VEXW) != 0);
64
+ p = deposit32(p, 29, 2, (opc & P_VEXL) != 0);
65
+
66
+ tcg_out32(s, p);
67
+ tcg_out8(s, opc);
40
+}
68
+}
41
+
69
+
42
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
70
static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
43
{
71
{
44
env_tlb(env)->d[mmu_idx].n_used_entries++;
72
- tcg_out_vex_opc(s, opc, r, v, rm, 0);
73
+ if (opc & P_EVEX) {
74
+ tcg_out_evex_opc(s, opc, r, v, rm, 0);
75
+ } else {
76
+ tcg_out_vex_opc(s, opc, r, v, rm, 0);
77
+ }
78
tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
79
}
80
45
--
81
--
46
2.20.1
82
2.25.1
47
83
48
84
diff view generated by jsdifflib
New patch
1
The condition for UMIN/UMAX availability is about to change;
2
use the canonical version.
1
3
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 8 ++++----
9
1 file changed, 4 insertions(+), 4 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
16
fixup = NEED_SWAP | NEED_INV;
17
break;
18
case TCG_COND_LEU:
19
- if (vece <= MO_32) {
20
+ if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
21
fixup = NEED_UMIN;
22
} else {
23
fixup = NEED_BIAS | NEED_INV;
24
}
25
break;
26
case TCG_COND_GTU:
27
- if (vece <= MO_32) {
28
+ if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece)) {
29
fixup = NEED_UMIN | NEED_INV;
30
} else {
31
fixup = NEED_BIAS;
32
}
33
break;
34
case TCG_COND_GEU:
35
- if (vece <= MO_32) {
36
+ if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
37
fixup = NEED_UMAX;
38
} else {
39
fixup = NEED_BIAS | NEED_SWAP | NEED_INV;
40
}
41
break;
42
case TCG_COND_LTU:
43
- if (vece <= MO_32) {
44
+ if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece)) {
45
fixup = NEED_UMAX | NEED_INV;
46
} else {
47
fixup = NEED_BIAS | NEED_SWAP;
48
--
49
2.25.1
50
51
diff view generated by jsdifflib
New patch
1
AVX512VL has VPSRAVQ, and
2
AVX512BW has VPSLLVW, VPSRAVW, VPSRLVW.
1
3
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 32 ++++++++++++++++++++++++--------
9
1 file changed, 24 insertions(+), 8 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
16
#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
17
#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
18
#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
19
+#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
20
#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
21
#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
22
+#define OPC_VPSRAVW (0x11 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
23
#define OPC_VPSRAVD (0x46 | P_EXT38 | P_DATA16)
24
+#define OPC_VPSRAVQ (0x46 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
25
+#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
26
#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
27
#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
28
#define OPC_VZEROUPPER (0x77 | P_EXT)
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
30
OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
31
};
32
static int const shlv_insn[4] = {
33
- /* TODO: AVX512 adds support for MO_16. */
34
- OPC_UD2, OPC_UD2, OPC_VPSLLVD, OPC_VPSLLVQ
35
+ OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ
36
};
37
static int const shrv_insn[4] = {
38
- /* TODO: AVX512 adds support for MO_16. */
39
- OPC_UD2, OPC_UD2, OPC_VPSRLVD, OPC_VPSRLVQ
40
+ OPC_UD2, OPC_VPSRLVW, OPC_VPSRLVD, OPC_VPSRLVQ
41
};
42
static int const sarv_insn[4] = {
43
- /* TODO: AVX512 adds support for MO_16, MO_64. */
44
- OPC_UD2, OPC_UD2, OPC_VPSRAVD, OPC_UD2
45
+ OPC_UD2, OPC_VPSRAVW, OPC_VPSRAVD, OPC_VPSRAVQ
46
};
47
static int const shls_insn[4] = {
48
OPC_UD2, OPC_PSLLW, OPC_PSLLD, OPC_PSLLQ
49
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
50
51
case INDEX_op_shlv_vec:
52
case INDEX_op_shrv_vec:
53
- return have_avx2 && vece >= MO_32;
54
+ switch (vece) {
55
+ case MO_16:
56
+ return have_avx512bw;
57
+ case MO_32:
58
+ case MO_64:
59
+ return have_avx2;
60
+ }
61
+ return 0;
62
case INDEX_op_sarv_vec:
63
- return have_avx2 && vece == MO_32;
64
+ switch (vece) {
65
+ case MO_16:
66
+ return have_avx512bw;
67
+ case MO_32:
68
+ return have_avx2;
69
+ case MO_64:
70
+ return have_avx512vl;
71
+ }
72
+ return 0;
73
case INDEX_op_rotlv_vec:
74
case INDEX_op_rotrv_vec:
75
return have_avx2 && vece >= MO_32 ? -1 : 0;
76
--
77
2.25.1
78
79
diff view generated by jsdifflib
New patch
1
AVX512VL has VPSRAQ.
1
2
3
Tested-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target.c.inc | 12 ++++++++++--
8
1 file changed, 10 insertions(+), 2 deletions(-)
9
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.c.inc
13
+++ b/tcg/i386/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
15
#define OPC_PSLLQ (0xf3 | P_EXT | P_DATA16)
16
#define OPC_PSRAW (0xe1 | P_EXT | P_DATA16)
17
#define OPC_PSRAD (0xe2 | P_EXT | P_DATA16)
18
+#define OPC_VPSRAQ (0x72 | P_EXT | P_DATA16 | P_VEXW | P_EVEX)
19
#define OPC_PSRLW (0xd1 | P_EXT | P_DATA16)
20
#define OPC_PSRLD (0xd2 | P_EXT | P_DATA16)
21
#define OPC_PSRLQ (0xd3 | P_EXT | P_DATA16)
22
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
23
OPC_UD2, OPC_PSRLW, OPC_PSRLD, OPC_PSRLQ
24
};
25
static int const sars_insn[4] = {
26
- OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_UD2
27
+ OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ
28
};
29
static int const abs_insn[4] = {
30
/* TODO: AVX512 adds support for MO_64. */
31
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
32
case INDEX_op_shrs_vec:
33
return vece >= MO_16;
34
case INDEX_op_sars_vec:
35
- return vece >= MO_16 && vece <= MO_32;
36
+ switch (vece) {
37
+ case MO_16:
38
+ case MO_32:
39
+ return 1;
40
+ case MO_64:
41
+ return have_avx512vl;
42
+ }
43
+ return 0;
44
case INDEX_op_rotls_vec:
45
return vece >= MO_16 ? -1 : 0;
46
47
--
48
2.25.1
49
50
diff view generated by jsdifflib
New patch
1
AVX512 has VPSRAQ with immediate operand, in the same form as
2
with AVX, but requires EVEX encoding and W1.
1
3
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 30 +++++++++++++++++++++---------
9
1 file changed, 21 insertions(+), 9 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
16
break;
17
18
case INDEX_op_shli_vec:
19
+ insn = shift_imm_insn[vece];
20
sub = 6;
21
goto gen_shift;
22
case INDEX_op_shri_vec:
23
+ insn = shift_imm_insn[vece];
24
sub = 2;
25
goto gen_shift;
26
case INDEX_op_sari_vec:
27
- tcg_debug_assert(vece != MO_64);
28
+ if (vece == MO_64) {
29
+ insn = OPC_PSHIFTD_Ib | P_VEXW | P_EVEX;
30
+ } else {
31
+ insn = shift_imm_insn[vece];
32
+ }
33
sub = 4;
34
gen_shift:
35
tcg_debug_assert(vece != MO_8);
36
- insn = shift_imm_insn[vece];
37
if (type == TCG_TYPE_V256) {
38
insn |= P_VEXL;
39
}
40
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
41
return vece == MO_8 ? -1 : 1;
42
43
case INDEX_op_sari_vec:
44
- /* We must expand the operation for MO_8. */
45
- if (vece == MO_8) {
46
+ switch (vece) {
47
+ case MO_8:
48
return -1;
49
- }
50
- /* We can emulate this for MO_64, but it does not pay off
51
- unless we're producing at least 4 values. */
52
- if (vece == MO_64) {
53
+ case MO_16:
54
+ case MO_32:
55
+ return 1;
56
+ case MO_64:
57
+ if (have_avx512vl) {
58
+ return 1;
59
+ }
60
+ /*
61
+ * We can emulate this for MO_64, but it does not pay off
62
+ * unless we're producing at least 4 values.
63
+ */
64
return type >= TCG_TYPE_V256 ? -1 : 0;
65
}
66
- return 1;
67
+ return 0;
68
69
case INDEX_op_shls_vec:
70
case INDEX_op_shrs_vec:
71
--
72
2.25.1
73
74
diff view generated by jsdifflib
New patch
1
AVX512VL has VPROLD and VPROLQ, layered onto the same
2
opcode as PSHIFTD, but requires EVEX encoding and W1.
1
3
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/i386/tcg-target.h | 2 +-
7
tcg/i386/tcg-target.c.inc | 15 +++++++++++++--
8
2 files changed, 14 insertions(+), 3 deletions(-)
9
10
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/i386/tcg-target.h
13
+++ b/tcg/i386/tcg-target.h
14
@@ -XXX,XX +XXX,XX @@ extern bool have_movbe;
15
#define TCG_TARGET_HAS_not_vec 0
16
#define TCG_TARGET_HAS_neg_vec 0
17
#define TCG_TARGET_HAS_abs_vec 1
18
-#define TCG_TARGET_HAS_roti_vec 0
19
+#define TCG_TARGET_HAS_roti_vec have_avx512vl
20
#define TCG_TARGET_HAS_rots_vec 0
21
#define TCG_TARGET_HAS_rotv_vec 0
22
#define TCG_TARGET_HAS_shi_vec 1
23
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/i386/tcg-target.c.inc
26
+++ b/tcg/i386/tcg-target.c.inc
27
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
28
#define OPC_PSHUFLW (0x70 | P_EXT | P_SIMDF2)
29
#define OPC_PSHUFHW (0x70 | P_EXT | P_SIMDF3)
30
#define OPC_PSHIFTW_Ib (0x71 | P_EXT | P_DATA16) /* /2 /6 /4 */
31
-#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /2 /6 /4 */
32
+#define OPC_PSHIFTD_Ib (0x72 | P_EXT | P_DATA16) /* /1 /2 /6 /4 */
33
#define OPC_PSHIFTQ_Ib (0x73 | P_EXT | P_DATA16) /* /2 /6 /4 */
34
#define OPC_PSLLW (0xf1 | P_EXT | P_DATA16)
35
#define OPC_PSLLD (0xf2 | P_EXT | P_DATA16)
36
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
37
insn = shift_imm_insn[vece];
38
}
39
sub = 4;
40
+ goto gen_shift;
41
+ case INDEX_op_rotli_vec:
42
+ insn = OPC_PSHIFTD_Ib | P_EVEX; /* VPROL[DQ] */
43
+ if (vece == MO_64) {
44
+ insn |= P_VEXW;
45
+ }
46
+ sub = 1;
47
+ goto gen_shift;
48
gen_shift:
49
tcg_debug_assert(vece != MO_8);
50
if (type == TCG_TYPE_V256) {
51
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
52
case INDEX_op_shli_vec:
53
case INDEX_op_shri_vec:
54
case INDEX_op_sari_vec:
55
+ case INDEX_op_rotli_vec:
56
case INDEX_op_x86_psrldq_vec:
57
return C_O1_I1(x, x);
58
59
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
60
case INDEX_op_xor_vec:
61
case INDEX_op_andc_vec:
62
return 1;
63
- case INDEX_op_rotli_vec:
64
case INDEX_op_cmp_vec:
65
case INDEX_op_cmpsel_vec:
66
return -1;
67
68
+ case INDEX_op_rotli_vec:
69
+ return have_avx512vl && vece >= MO_32 ? 1 : -1;
70
+
71
case INDEX_op_shli_vec:
72
case INDEX_op_shri_vec:
73
/* We must expand the operation for MO_8. */
74
--
75
2.25.1
diff view generated by jsdifflib
New patch
1
AVX512VL has VPROLVD and VPRORVQ.
1
2
3
Tested-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target.h | 2 +-
8
tcg/i386/tcg-target.c.inc | 25 ++++++++++++++++++++++++-
9
2 files changed, 25 insertions(+), 2 deletions(-)
10
11
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.h
14
+++ b/tcg/i386/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ extern bool have_movbe;
16
#define TCG_TARGET_HAS_abs_vec 1
17
#define TCG_TARGET_HAS_roti_vec have_avx512vl
18
#define TCG_TARGET_HAS_rots_vec 0
19
-#define TCG_TARGET_HAS_rotv_vec 0
20
+#define TCG_TARGET_HAS_rotv_vec have_avx512vl
21
#define TCG_TARGET_HAS_shi_vec 1
22
#define TCG_TARGET_HAS_shs_vec 1
23
#define TCG_TARGET_HAS_shv_vec have_avx2
24
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/i386/tcg-target.c.inc
27
+++ b/tcg/i386/tcg-target.c.inc
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
29
#define OPC_VPBROADCASTQ (0x59 | P_EXT38 | P_DATA16)
30
#define OPC_VPERMQ (0x00 | P_EXT3A | P_DATA16 | P_VEXW)
31
#define OPC_VPERM2I128 (0x46 | P_EXT3A | P_DATA16 | P_VEXL)
32
+#define OPC_VPROLVD (0x15 | P_EXT38 | P_DATA16 | P_EVEX)
33
+#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
34
+#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX)
35
+#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
36
#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
37
#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
38
#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
40
static int const umax_insn[4] = {
41
OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
42
};
43
+ static int const rotlv_insn[4] = {
44
+ OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
45
+ };
46
+ static int const rotrv_insn[4] = {
47
+ OPC_UD2, OPC_UD2, OPC_VPRORVD, OPC_VPRORVQ
48
+ };
49
static int const shlv_insn[4] = {
50
OPC_UD2, OPC_VPSLLVW, OPC_VPSLLVD, OPC_VPSLLVQ
51
};
52
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
53
case INDEX_op_sarv_vec:
54
insn = sarv_insn[vece];
55
goto gen_simd;
56
+ case INDEX_op_rotlv_vec:
57
+ insn = rotlv_insn[vece];
58
+ goto gen_simd;
59
+ case INDEX_op_rotrv_vec:
60
+ insn = rotrv_insn[vece];
61
+ goto gen_simd;
62
case INDEX_op_shls_vec:
63
insn = shls_insn[vece];
64
goto gen_simd;
65
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
66
case INDEX_op_shlv_vec:
67
case INDEX_op_shrv_vec:
68
case INDEX_op_sarv_vec:
69
+ case INDEX_op_rotlv_vec:
70
+ case INDEX_op_rotrv_vec:
71
case INDEX_op_shls_vec:
72
case INDEX_op_shrs_vec:
73
case INDEX_op_sars_vec:
74
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
75
return 0;
76
case INDEX_op_rotlv_vec:
77
case INDEX_op_rotrv_vec:
78
- return have_avx2 && vece >= MO_32 ? -1 : 0;
79
+ switch (vece) {
80
+ case MO_32:
81
+ case MO_64:
82
+ return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
83
+ }
84
+ return 0;
85
86
case INDEX_op_mul_vec:
87
if (vece == MO_8) {
88
--
89
2.25.1
90
91
diff view generated by jsdifflib
New patch
1
We will use VPSHLD, VPSHLDV and VPSHRDV for 16-bit rotates.
1
2
3
Tested-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/i386/tcg-target-con-set.h | 1 +
8
tcg/i386/tcg-target.opc.h | 3 +++
9
tcg/i386/tcg-target.c.inc | 38 +++++++++++++++++++++++++++++++++++
10
3 files changed, 42 insertions(+)
11
12
diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/i386/tcg-target-con-set.h
15
+++ b/tcg/i386/tcg-target-con-set.h
16
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, r, rI)
17
C_O1_I2(x, x, x)
18
C_N1_I2(r, r, r)
19
C_N1_I2(r, r, rW)
20
+C_O1_I3(x, 0, x, x)
21
C_O1_I3(x, x, x, x)
22
C_O1_I4(r, r, re, r, 0)
23
C_O1_I4(r, r, r, ri, ri)
24
diff --git a/tcg/i386/tcg-target.opc.h b/tcg/i386/tcg-target.opc.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/tcg/i386/tcg-target.opc.h
27
+++ b/tcg/i386/tcg-target.opc.h
28
@@ -XXX,XX +XXX,XX @@ DEF(x86_psrldq_vec, 1, 1, 1, IMPLVEC)
29
DEF(x86_vperm2i128_vec, 1, 2, 1, IMPLVEC)
30
DEF(x86_punpckl_vec, 1, 2, 0, IMPLVEC)
31
DEF(x86_punpckh_vec, 1, 2, 0, IMPLVEC)
32
+DEF(x86_vpshldi_vec, 1, 2, 1, IMPLVEC)
33
+DEF(x86_vpshldv_vec, 1, 3, 0, IMPLVEC)
34
+DEF(x86_vpshrdv_vec, 1, 3, 0, IMPLVEC)
35
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
36
index XXXXXXX..XXXXXXX 100644
37
--- a/tcg/i386/tcg-target.c.inc
38
+++ b/tcg/i386/tcg-target.c.inc
39
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
40
#define OPC_VPROLVQ (0x15 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
41
#define OPC_VPRORVD (0x14 | P_EXT38 | P_DATA16 | P_EVEX)
42
#define OPC_VPRORVQ (0x14 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
43
+#define OPC_VPSHLDW (0x70 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
44
+#define OPC_VPSHLDD (0x71 | P_EXT3A | P_DATA16 | P_EVEX)
45
+#define OPC_VPSHLDQ (0x71 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
46
+#define OPC_VPSHLDVW (0x70 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
47
+#define OPC_VPSHLDVD (0x71 | P_EXT38 | P_DATA16 | P_EVEX)
48
+#define OPC_VPSHLDVQ (0x71 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
49
+#define OPC_VPSHRDVW (0x72 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
50
+#define OPC_VPSHRDVD (0x73 | P_EXT38 | P_DATA16 | P_EVEX)
51
+#define OPC_VPSHRDVQ (0x73 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
52
#define OPC_VPSLLVW (0x12 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
53
#define OPC_VPSLLVD (0x47 | P_EXT38 | P_DATA16)
54
#define OPC_VPSLLVQ (0x47 | P_EXT38 | P_DATA16 | P_VEXW)
55
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
56
static int const sars_insn[4] = {
57
OPC_UD2, OPC_PSRAW, OPC_PSRAD, OPC_VPSRAQ
58
};
59
+ static int const vpshldi_insn[4] = {
60
+ OPC_UD2, OPC_VPSHLDW, OPC_VPSHLDD, OPC_VPSHLDQ
61
+ };
62
+ static int const vpshldv_insn[4] = {
63
+ OPC_UD2, OPC_VPSHLDVW, OPC_VPSHLDVD, OPC_VPSHLDVQ
64
+ };
65
+ static int const vpshrdv_insn[4] = {
66
+ OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ
67
+ };
68
static int const abs_insn[4] = {
69
/* TODO: AVX512 adds support for MO_64. */
70
OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
71
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
72
case INDEX_op_x86_packus_vec:
73
insn = packus_insn[vece];
74
goto gen_simd;
75
+ case INDEX_op_x86_vpshldv_vec:
76
+ insn = vpshldv_insn[vece];
77
+ a1 = a2;
78
+ a2 = args[3];
79
+ goto gen_simd;
80
+ case INDEX_op_x86_vpshrdv_vec:
81
+ insn = vpshrdv_insn[vece];
82
+ a1 = a2;
83
+ a2 = args[3];
84
+ goto gen_simd;
85
#if TCG_TARGET_REG_BITS == 32
86
case INDEX_op_dup2_vec:
87
/* First merge the two 32-bit inputs to a single 64-bit element. */
88
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
89
insn = OPC_VPERM2I128;
90
sub = args[3];
91
goto gen_simd_imm8;
92
+ case INDEX_op_x86_vpshldi_vec:
93
+ insn = vpshldi_insn[vece];
94
+ sub = args[3];
95
+ goto gen_simd_imm8;
96
gen_simd_imm8:
97
+ tcg_debug_assert(insn != OPC_UD2);
98
if (type == TCG_TYPE_V256) {
99
insn |= P_VEXL;
100
}
101
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
102
case INDEX_op_x86_vperm2i128_vec:
103
case INDEX_op_x86_punpckl_vec:
104
case INDEX_op_x86_punpckh_vec:
105
+ case INDEX_op_x86_vpshldi_vec:
106
#if TCG_TARGET_REG_BITS == 32
107
case INDEX_op_dup2_vec:
108
#endif
109
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
110
case INDEX_op_x86_psrldq_vec:
111
return C_O1_I1(x, x);
112
113
+ case INDEX_op_x86_vpshldv_vec:
114
+ case INDEX_op_x86_vpshrdv_vec:
115
+ return C_O1_I3(x, 0, x, x);
116
+
117
case INDEX_op_x86_vpblendvb_vec:
118
return C_O1_I3(x, x, x, x);
119
120
--
121
2.25.1
122
123
diff view generated by jsdifflib
1
No functional change, but the smaller expressions make
1
While there are no specific 16-bit rotate instructions, there
2
the code easier to read.
2
are double-word shifts, which can perform the same operation.
3
3
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
accel/tcg/cputlb.c | 35 +++++++++++++++++------------------
8
tcg/i386/tcg-target.c.inc | 18 +++++++++++++++++-
10
1 file changed, 17 insertions(+), 18 deletions(-)
9
1 file changed, 17 insertions(+), 1 deletion(-)
11
10
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
13
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/accel/tcg/cputlb.c
14
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
15
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
17
16
case INDEX_op_rotlv_vec:
18
/**
17
case INDEX_op_rotrv_vec:
19
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
18
switch (vece) {
20
- * @env: CPU that owns the TLB
19
+ case MO_16:
21
- * @mmu_idx: MMU index of the TLB
20
+ return have_avx512vbmi2 ? -1 : 0;
22
+ * @desc: The CPUTLBDesc portion of the TLB
21
case MO_32:
23
+ * @fast: The CPUTLBDescFast portion of the same TLB
22
case MO_64:
24
*
23
return have_avx512vl ? 1 : have_avx2 ? -1 : 0;
25
* Called with tlb_lock_held.
24
@@ -XXX,XX +XXX,XX @@ static void expand_vec_rotli(TCGType type, unsigned vece,
26
*
27
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
28
* high), since otherwise we are likely to have a significant amount of
29
* conflict misses.
30
*/
31
-static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
32
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
33
{
34
- CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
35
- size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
36
+ size_t old_size = tlb_n_entries(fast);
37
size_t rate;
38
size_t new_size = old_size;
39
int64_t now = get_clock_realtime();
40
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
41
return;
25
return;
42
}
26
}
43
27
44
- g_free(env_tlb(env)->f[mmu_idx].table);
28
+ if (have_avx512vbmi2) {
45
- g_free(env_tlb(env)->d[mmu_idx].iotlb);
29
+ vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
46
+ g_free(fast->table);
30
+ tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
47
+ g_free(desc->iotlb);
31
+ return;
48
32
+ }
49
tlb_window_reset(desc, now, 0);
50
/* desc->n_used_entries is cleared by the caller */
51
- env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
52
- env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
53
- env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
54
+ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
55
+ fast->table = g_try_new(CPUTLBEntry, new_size);
56
+ desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
57
+
33
+
58
/*
34
t = tcg_temp_new_vec(type);
59
* If the allocations fail, try smaller sizes. We just freed some
35
tcg_gen_shli_vec(vece, t, v1, imm);
60
* memory, so going back to half of new_size has a good chance of working.
36
tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
61
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
37
@@ -XXX,XX +XXX,XX @@ static void expand_vec_rotls(TCGType type, unsigned vece,
62
* allocations to fail though, so we progressively reduce the allocation
38
static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
63
* size, aborting if we cannot even allocate the smallest TLB we support.
39
TCGv_vec v1, TCGv_vec sh, bool right)
64
*/
65
- while (env_tlb(env)->f[mmu_idx].table == NULL ||
66
- env_tlb(env)->d[mmu_idx].iotlb == NULL) {
67
+ while (fast->table == NULL || desc->iotlb == NULL) {
68
if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
69
error_report("%s: %s", __func__, strerror(errno));
70
abort();
71
}
72
new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
73
- env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
74
+ fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
75
76
- g_free(env_tlb(env)->f[mmu_idx].table);
77
- g_free(env_tlb(env)->d[mmu_idx].iotlb);
78
- env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
79
- env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
80
+ g_free(fast->table);
81
+ g_free(desc->iotlb);
82
+ fast->table = g_try_new(CPUTLBEntry, new_size);
83
+ desc->iotlb = g_try_new(CPUIOTLBEntry, new_size);
84
}
85
}
86
87
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
88
{
40
{
89
- tlb_mmu_resize_locked(env, mmu_idx);
41
- TCGv_vec t = tcg_temp_new_vec(type);
90
+ tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
42
+ TCGv_vec t;
91
env_tlb(env)->d[mmu_idx].n_used_entries = 0;
43
92
env_tlb(env)->d[mmu_idx].large_page_addr = -1;
44
+ if (have_avx512vbmi2) {
93
env_tlb(env)->d[mmu_idx].large_page_mask = -1;
45
+ vec_gen_4(right ? INDEX_op_x86_vpshrdv_vec : INDEX_op_x86_vpshldv_vec,
46
+ type, vece, tcgv_vec_arg(v0), tcgv_vec_arg(v1),
47
+ tcgv_vec_arg(v1), tcgv_vec_arg(sh));
48
+ return;
49
+ }
50
+
51
+ t = tcg_temp_new_vec(type);
52
tcg_gen_dupi_vec(vece, t, 8 << vece);
53
tcg_gen_sub_vec(vece, t, t, sh);
54
if (right) {
94
--
55
--
95
2.20.1
56
2.25.1
96
57
97
58
diff view generated by jsdifflib
New patch
1
There is no such instruction on x86, so we should
2
not be pretending it has arguments.
1
3
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/i386/tcg-target.c.inc | 1 -
9
1 file changed, 1 deletion(-)
10
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
16
case INDEX_op_shls_vec:
17
case INDEX_op_shrs_vec:
18
case INDEX_op_sars_vec:
19
- case INDEX_op_rotls_vec:
20
case INDEX_op_cmp_vec:
21
case INDEX_op_x86_shufps_vec:
22
case INDEX_op_x86_blend_vec:
23
--
24
2.25.1
25
26
diff view generated by jsdifflib
1
Merge into the only caller, but at the same time split
1
Expand 32-bit and 64-bit scalar rotate with VPRO[LR]V;
2
out tlb_mmu_init to initialize a single tlb entry.
2
expand 16-bit scalar rotate with VPSHLDV.
3
3
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
accel/tcg/cputlb.c | 33 ++++++++++++++++-----------------
8
tcg/i386/tcg-target.c.inc | 49 +++++++++++++++++++++++----------------
10
1 file changed, 16 insertions(+), 17 deletions(-)
9
1 file changed, 29 insertions(+), 20 deletions(-)
11
10
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
13
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/accel/tcg/cputlb.c
14
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
15
@@ -XXX,XX +XXX,XX @@ static void expand_vec_rotli(TCGType type, unsigned vece,
17
desc->window_max_entries = max_entries;
16
tcg_temp_free_vec(t);
18
}
17
}
19
18
20
-static void tlb_dyn_init(CPUArchState *env)
19
-static void expand_vec_rotls(TCGType type, unsigned vece,
20
- TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
21
-{
21
-{
22
- int i;
22
- TCGv_i32 rsh;
23
- TCGv_vec t;
23
-
24
-
24
- for (i = 0; i < NB_MMU_MODES; i++) {
25
- tcg_debug_assert(vece != MO_8);
25
- CPUTLBDesc *desc = &env_tlb(env)->d[i];
26
- size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
27
-
26
-
28
- tlb_window_reset(desc, get_clock_realtime(), 0);
27
- t = tcg_temp_new_vec(type);
29
- desc->n_used_entries = 0;
28
- rsh = tcg_temp_new_i32();
30
- env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
29
-
31
- env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
30
- tcg_gen_neg_i32(rsh, lsh);
32
- env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
31
- tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
33
- }
32
- tcg_gen_shls_vec(vece, t, v1, lsh);
33
- tcg_gen_shrs_vec(vece, v0, v1, rsh);
34
- tcg_gen_or_vec(vece, v0, v0, t);
35
- tcg_temp_free_vec(t);
36
- tcg_temp_free_i32(rsh);
34
-}
37
-}
35
-
38
-
36
/**
39
static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
37
* tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
40
TCGv_vec v1, TCGv_vec sh, bool right)
38
* @desc: The CPUTLBDesc portion of the TLB
41
{
39
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
42
@@ -XXX,XX +XXX,XX @@ static void expand_vec_rotv(TCGType type, unsigned vece, TCGv_vec v0,
40
tlb_mmu_flush_locked(desc, fast);
43
tcg_temp_free_vec(t);
41
}
44
}
42
45
43
+static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
46
+static void expand_vec_rotls(TCGType type, unsigned vece,
47
+ TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
44
+{
48
+{
45
+ size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
49
+ TCGv_vec t = tcg_temp_new_vec(type);
46
+
50
+
47
+ tlb_window_reset(desc, now, 0);
51
+ tcg_debug_assert(vece != MO_8);
48
+ desc->n_used_entries = 0;
52
+
49
+ fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
53
+ if (vece >= MO_32 ? have_avx512vl : have_avx512vbmi2) {
50
+ fast->table = g_new(CPUTLBEntry, n_entries);
54
+ tcg_gen_dup_i32_vec(vece, t, lsh);
51
+ desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
55
+ if (vece >= MO_32) {
56
+ tcg_gen_rotlv_vec(vece, v0, v1, t);
57
+ } else {
58
+ expand_vec_rotv(type, vece, v0, v1, t, false);
59
+ }
60
+ } else {
61
+ TCGv_i32 rsh = tcg_temp_new_i32();
62
+
63
+ tcg_gen_neg_i32(rsh, lsh);
64
+ tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
65
+ tcg_gen_shls_vec(vece, t, v1, lsh);
66
+ tcg_gen_shrs_vec(vece, v0, v1, rsh);
67
+ tcg_gen_or_vec(vece, v0, v0, t);
68
+
69
+ tcg_temp_free_i32(rsh);
70
+ }
71
+
72
+ tcg_temp_free_vec(t);
52
+}
73
+}
53
+
74
+
54
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
75
static void expand_vec_mul(TCGType type, unsigned vece,
76
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
55
{
77
{
56
env_tlb(env)->d[mmu_idx].n_used_entries++;
57
@@ -XXX,XX +XXX,XX @@ static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
58
void tlb_init(CPUState *cpu)
59
{
60
CPUArchState *env = cpu->env_ptr;
61
+ int64_t now = get_clock_realtime();
62
+ int i;
63
64
qemu_spin_init(&env_tlb(env)->c.lock);
65
66
/* Ensure that cpu_reset performs a full flush. */
67
env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
68
69
- tlb_dyn_init(env);
70
+ for (i = 0; i < NB_MMU_MODES; i++) {
71
+ tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
72
+ }
73
}
74
75
/* flush_all_helper: run fn across all cpus
76
--
78
--
77
2.20.1
79
2.25.1
78
80
79
81
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
AVX512VL has VPABSQ, VPMAXSQ, VPMAXUQ, VPMINSQ, VPMINUQ.
2
2
3
To avoid scrolling each instruction when reviewing tcg
3
Tested-by: Alex Bennée <alex.bennee@linaro.org>
4
helpers written for the decodetree script, display the
5
.decode files (similar to header declarations) before
6
the C source (implementation of previous declarations).
7
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
11
Message-Id: <20191230082856.30556-1-philmd@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
6
---
14
scripts/git.orderfile | 3 +++
7
tcg/i386/tcg-target.c.inc | 18 +++++++++++-------
15
1 file changed, 3 insertions(+)
8
1 file changed, 11 insertions(+), 7 deletions(-)
16
9
17
diff --git a/scripts/git.orderfile b/scripts/git.orderfile
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
18
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
19
--- a/scripts/git.orderfile
12
--- a/tcg/i386/tcg-target.c.inc
20
+++ b/scripts/git.orderfile
13
+++ b/tcg/i386/tcg-target.c.inc
21
@@ -XXX,XX +XXX,XX @@ qga/*.json
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
22
# headers
15
#define OPC_PABSB (0x1c | P_EXT38 | P_DATA16)
23
*.h
16
#define OPC_PABSW (0x1d | P_EXT38 | P_DATA16)
24
17
#define OPC_PABSD (0x1e | P_EXT38 | P_DATA16)
25
+# decoding tree specification
18
+#define OPC_VPABSQ (0x1f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
26
+*.decode
19
#define OPC_PACKSSDW (0x6b | P_EXT | P_DATA16)
27
+
20
#define OPC_PACKSSWB (0x63 | P_EXT | P_DATA16)
28
# code
21
#define OPC_PACKUSDW (0x2b | P_EXT38 | P_DATA16)
29
*.c
22
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
23
#define OPC_PMAXSB (0x3c | P_EXT38 | P_DATA16)
24
#define OPC_PMAXSW (0xee | P_EXT | P_DATA16)
25
#define OPC_PMAXSD (0x3d | P_EXT38 | P_DATA16)
26
+#define OPC_VPMAXSQ (0x3d | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
27
#define OPC_PMAXUB (0xde | P_EXT | P_DATA16)
28
#define OPC_PMAXUW (0x3e | P_EXT38 | P_DATA16)
29
#define OPC_PMAXUD (0x3f | P_EXT38 | P_DATA16)
30
+#define OPC_VPMAXUQ (0x3f | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
31
#define OPC_PMINSB (0x38 | P_EXT38 | P_DATA16)
32
#define OPC_PMINSW (0xea | P_EXT | P_DATA16)
33
#define OPC_PMINSD (0x39 | P_EXT38 | P_DATA16)
34
+#define OPC_VPMINSQ (0x39 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
35
#define OPC_PMINUB (0xda | P_EXT | P_DATA16)
36
#define OPC_PMINUW (0x3a | P_EXT38 | P_DATA16)
37
#define OPC_PMINUD (0x3b | P_EXT38 | P_DATA16)
38
+#define OPC_VPMINUQ (0x3b | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
39
#define OPC_PMOVSXBW (0x20 | P_EXT38 | P_DATA16)
40
#define OPC_PMOVSXWD (0x23 | P_EXT38 | P_DATA16)
41
#define OPC_PMOVSXDQ (0x25 | P_EXT38 | P_DATA16)
42
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
43
OPC_PACKUSWB, OPC_PACKUSDW, OPC_UD2, OPC_UD2
44
};
45
static int const smin_insn[4] = {
46
- OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_UD2
47
+ OPC_PMINSB, OPC_PMINSW, OPC_PMINSD, OPC_VPMINSQ
48
};
49
static int const smax_insn[4] = {
50
- OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_UD2
51
+ OPC_PMAXSB, OPC_PMAXSW, OPC_PMAXSD, OPC_VPMAXSQ
52
};
53
static int const umin_insn[4] = {
54
- OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_UD2
55
+ OPC_PMINUB, OPC_PMINUW, OPC_PMINUD, OPC_VPMINUQ
56
};
57
static int const umax_insn[4] = {
58
- OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_UD2
59
+ OPC_PMAXUB, OPC_PMAXUW, OPC_PMAXUD, OPC_VPMAXUQ
60
};
61
static int const rotlv_insn[4] = {
62
OPC_UD2, OPC_UD2, OPC_VPROLVD, OPC_VPROLVQ
63
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
64
OPC_UD2, OPC_VPSHRDVW, OPC_VPSHRDVD, OPC_VPSHRDVQ
65
};
66
static int const abs_insn[4] = {
67
- /* TODO: AVX512 adds support for MO_64. */
68
- OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_UD2
69
+ OPC_PABSB, OPC_PABSW, OPC_PABSD, OPC_VPABSQ
70
};
71
72
TCGType type = vecl + TCG_TYPE_V64;
73
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
74
case INDEX_op_umin_vec:
75
case INDEX_op_umax_vec:
76
case INDEX_op_abs_vec:
77
- return vece <= MO_32;
78
+ return vece <= MO_32 || have_avx512vl;
79
80
default:
81
return 0;
30
--
82
--
31
2.20.1
83
2.25.1
32
84
33
85
diff view generated by jsdifflib
1
No functional change, but the smaller expressions make
1
AVX512DQ has VPMULLQ.
2
the code easier to read.
3
2
3
Tested-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
accel/tcg/cputlb.c | 19 ++++++++++---------
7
tcg/i386/tcg-target.c.inc | 12 ++++++------
10
1 file changed, 10 insertions(+), 9 deletions(-)
8
1 file changed, 6 insertions(+), 6 deletions(-)
11
9
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
10
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
12
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/accel/tcg/cputlb.c
13
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
17
15
#define OPC_PMOVZXDQ (0x35 | P_EXT38 | P_DATA16)
18
static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
16
#define OPC_PMULLW (0xd5 | P_EXT | P_DATA16)
19
{
17
#define OPC_PMULLD (0x40 | P_EXT38 | P_DATA16)
20
- tlb_mmu_resize_locked(&env_tlb(env)->d[mmu_idx], &env_tlb(env)->f[mmu_idx]);
18
+#define OPC_VPMULLQ (0x40 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
21
- env_tlb(env)->d[mmu_idx].n_used_entries = 0;
19
#define OPC_POR (0xeb | P_EXT | P_DATA16)
22
- env_tlb(env)->d[mmu_idx].large_page_addr = -1;
20
#define OPC_PSHUFB (0x00 | P_EXT38 | P_DATA16)
23
- env_tlb(env)->d[mmu_idx].large_page_mask = -1;
21
#define OPC_PSHUFD (0x70 | P_EXT | P_DATA16)
24
- env_tlb(env)->d[mmu_idx].vindex = 0;
22
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
25
- memset(env_tlb(env)->f[mmu_idx].table, -1,
23
OPC_PSUBUB, OPC_PSUBUW, OPC_UD2, OPC_UD2
26
- sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
24
};
27
- memset(env_tlb(env)->d[mmu_idx].vtable, -1,
25
static int const mul_insn[4] = {
28
- sizeof(env_tlb(env)->d[0].vtable));
26
- OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_UD2
29
+ CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
27
+ OPC_UD2, OPC_PMULLW, OPC_PMULLD, OPC_VPMULLQ
30
+ CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
28
};
31
+
29
static int const shift_imm_insn[4] = {
32
+ tlb_mmu_resize_locked(desc, fast);
30
OPC_UD2, OPC_PSHIFTW_Ib, OPC_PSHIFTD_Ib, OPC_PSHIFTQ_Ib
33
+ desc->n_used_entries = 0;
31
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
34
+ desc->large_page_addr = -1;
32
return 0;
35
+ desc->large_page_mask = -1;
33
36
+ desc->vindex = 0;
34
case INDEX_op_mul_vec:
37
+ memset(fast->table, -1, sizeof_tlb(fast));
35
- if (vece == MO_8) {
38
+ memset(desc->vtable, -1, sizeof(desc->vtable));
36
- /* We can expand the operation for MO_8. */
39
}
37
+ switch (vece) {
40
38
+ case MO_8:
41
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
39
return -1;
40
- }
41
- if (vece == MO_64) {
42
- return 0;
43
+ case MO_64:
44
+ return have_avx512dq;
45
}
46
return 1;
47
42
--
48
--
43
2.20.1
49
2.25.1
44
50
45
51
diff view generated by jsdifflib
1
We do not need the entire CPUArchState to compute these values.
1
AVX512VL has a general ternary logic operation, VPTERNLOGQ,
2
which can implement NOT, ORC, NAND, NOR, EQV.
2
3
4
Tested-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
accel/tcg/cputlb.c | 15 ++++++++-------
8
tcg/i386/tcg-target.h | 10 +++++-----
9
1 file changed, 8 insertions(+), 7 deletions(-)
9
tcg/i386/tcg-target.c.inc | 34 ++++++++++++++++++++++++++++++++++
10
2 files changed, 39 insertions(+), 5 deletions(-)
10
11
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
14
--- a/tcg/i386/tcg-target.h
14
+++ b/accel/tcg/cputlb.c
15
+++ b/tcg/i386/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
16
@@ -XXX,XX +XXX,XX @@ extern bool have_movbe;
16
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
17
#define TCG_TARGET_HAS_v256 have_avx2
17
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
18
18
19
#define TCG_TARGET_HAS_andc_vec 1
19
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
20
-#define TCG_TARGET_HAS_orc_vec 0
20
+static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
21
-#define TCG_TARGET_HAS_nand_vec 0
21
{
22
-#define TCG_TARGET_HAS_nor_vec 0
22
- return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
23
-#define TCG_TARGET_HAS_eqv_vec 0
23
+ return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
24
-#define TCG_TARGET_HAS_not_vec 0
24
}
25
+#define TCG_TARGET_HAS_orc_vec have_avx512vl
25
26
+#define TCG_TARGET_HAS_nand_vec have_avx512vl
26
-static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
27
+#define TCG_TARGET_HAS_nor_vec have_avx512vl
27
+static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
28
+#define TCG_TARGET_HAS_eqv_vec have_avx512vl
28
{
29
+#define TCG_TARGET_HAS_not_vec have_avx512vl
29
- return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
30
#define TCG_TARGET_HAS_neg_vec 0
30
+ return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
31
#define TCG_TARGET_HAS_abs_vec 1
31
}
32
#define TCG_TARGET_HAS_roti_vec have_avx512vl
32
33
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
33
static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
34
index XXXXXXX..XXXXXXX 100644
34
@@ -XXX,XX +XXX,XX @@ static void tlb_dyn_init(CPUArchState *env)
35
--- a/tcg/i386/tcg-target.c.inc
35
static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
36
+++ b/tcg/i386/tcg-target.c.inc
36
{
37
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
37
CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
38
#define OPC_VPSRLVW (0x10 | P_EXT38 | P_DATA16 | P_VEXW | P_EVEX)
38
- size_t old_size = tlb_n_entries(env, mmu_idx);
39
#define OPC_VPSRLVD (0x45 | P_EXT38 | P_DATA16)
39
+ size_t old_size = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
40
#define OPC_VPSRLVQ (0x45 | P_EXT38 | P_DATA16 | P_VEXW)
40
size_t rate;
41
+#define OPC_VPTERNLOGQ (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
41
size_t new_size = old_size;
42
#define OPC_VZEROUPPER (0x77 | P_EXT)
42
int64_t now = get_clock_realtime();
43
#define OPC_XCHG_ax_r32    (0x90)
43
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
44
44
env_tlb(env)->d[mmu_idx].large_page_addr = -1;
45
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
45
env_tlb(env)->d[mmu_idx].large_page_mask = -1;
46
insn = vpshldi_insn[vece];
46
env_tlb(env)->d[mmu_idx].vindex = 0;
47
sub = args[3];
47
- memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
48
goto gen_simd_imm8;
48
+ memset(env_tlb(env)->f[mmu_idx].table, -1,
49
+
49
+ sizeof_tlb(&env_tlb(env)->f[mmu_idx]));
50
+ case INDEX_op_not_vec:
50
memset(env_tlb(env)->d[mmu_idx].vtable, -1,
51
+ insn = OPC_VPTERNLOGQ;
51
sizeof(env_tlb(env)->d[0].vtable));
52
+ a2 = a1;
52
}
53
+ sub = 0x33; /* !B */
53
@@ -XXX,XX +XXX,XX @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
54
+ goto gen_simd_imm8;
54
qemu_spin_lock(&env_tlb(env)->c.lock);
55
+ case INDEX_op_nor_vec:
55
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
56
+ insn = OPC_VPTERNLOGQ;
56
unsigned int i;
57
+ sub = 0x11; /* norCB */
57
- unsigned int n = tlb_n_entries(env, mmu_idx);
58
+ goto gen_simd_imm8;
58
+ unsigned int n = tlb_n_entries(&env_tlb(env)->f[mmu_idx]);
59
+ case INDEX_op_nand_vec:
59
60
+ insn = OPC_VPTERNLOGQ;
60
for (i = 0; i < n; i++) {
61
+ sub = 0x77; /* nandCB */
61
tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
62
+ goto gen_simd_imm8;
63
+ case INDEX_op_eqv_vec:
64
+ insn = OPC_VPTERNLOGQ;
65
+ sub = 0x99; /* xnorCB */
66
+ goto gen_simd_imm8;
67
+ case INDEX_op_orc_vec:
68
+ insn = OPC_VPTERNLOGQ;
69
+ sub = 0xdd; /* orB!C */
70
+ goto gen_simd_imm8;
71
+
72
gen_simd_imm8:
73
tcg_debug_assert(insn != OPC_UD2);
74
if (type == TCG_TYPE_V256) {
75
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
76
case INDEX_op_or_vec:
77
case INDEX_op_xor_vec:
78
case INDEX_op_andc_vec:
79
+ case INDEX_op_orc_vec:
80
+ case INDEX_op_nand_vec:
81
+ case INDEX_op_nor_vec:
82
+ case INDEX_op_eqv_vec:
83
case INDEX_op_ssadd_vec:
84
case INDEX_op_usadd_vec:
85
case INDEX_op_sssub_vec:
86
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
87
88
case INDEX_op_abs_vec:
89
case INDEX_op_dup_vec:
90
+ case INDEX_op_not_vec:
91
case INDEX_op_shli_vec:
92
case INDEX_op_shri_vec:
93
case INDEX_op_sari_vec:
94
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
95
case INDEX_op_or_vec:
96
case INDEX_op_xor_vec:
97
case INDEX_op_andc_vec:
98
+ case INDEX_op_orc_vec:
99
+ case INDEX_op_nand_vec:
100
+ case INDEX_op_nor_vec:
101
+ case INDEX_op_eqv_vec:
102
+ case INDEX_op_not_vec:
103
return 1;
104
case INDEX_op_cmp_vec:
105
case INDEX_op_cmpsel_vec:
62
--
106
--
63
2.20.1
107
2.25.1
64
108
65
109
diff view generated by jsdifflib
1
The result of g_strsplit is never NULL.
1
The general ternary logic operation can implement BITSEL.
2
Funnel the 4-operand operation into three variants of the
3
3-operand instruction, depending on input operand overlap.
2
4
3
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Tested-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
vl.c | 2 +-
9
tcg/i386/tcg-target.h | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
10
tcg/i386/tcg-target.c.inc | 20 +++++++++++++++++++-
11
2 files changed, 20 insertions(+), 2 deletions(-)
11
12
12
diff --git a/vl.c b/vl.c
13
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/vl.c
15
--- a/tcg/i386/tcg-target.h
15
+++ b/vl.c
16
+++ b/tcg/i386/tcg-target.h
16
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
17
@@ -XXX,XX +XXX,XX @@ extern bool have_movbe;
17
18
#define TCG_TARGET_HAS_mul_vec 1
18
accel_list = g_strsplit(accel, ":", 0);
19
#define TCG_TARGET_HAS_sat_vec 1
19
20
#define TCG_TARGET_HAS_minmax_vec 1
20
- for (tmp = accel_list; tmp && *tmp; tmp++) {
21
-#define TCG_TARGET_HAS_bitsel_vec 0
21
+ for (tmp = accel_list; *tmp; tmp++) {
22
+#define TCG_TARGET_HAS_bitsel_vec have_avx512vl
22
/*
23
#define TCG_TARGET_HAS_cmpsel_vec -1
23
* Filter invalid accelerators here, to prevent obscenities
24
24
* such as "-machine accel=tcg,,thread=single".
25
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
26
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/i386/tcg-target.c.inc
29
+++ b/tcg/i386/tcg-target.c.inc
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
31
32
TCGType type = vecl + TCG_TYPE_V64;
33
int insn, sub;
34
- TCGArg a0, a1, a2;
35
+ TCGArg a0, a1, a2, a3;
36
37
a0 = args[0];
38
a1 = args[1];
39
@@ -XXX,XX +XXX,XX @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
40
sub = 0xdd; /* orB!C */
41
goto gen_simd_imm8;
42
43
+ case INDEX_op_bitsel_vec:
44
+ insn = OPC_VPTERNLOGQ;
45
+ a3 = args[3];
46
+ if (a0 == a1) {
47
+ a1 = a2;
48
+ a2 = a3;
49
+ sub = 0xca; /* A?B:C */
50
+ } else if (a0 == a2) {
51
+ a2 = a3;
52
+ sub = 0xe2; /* B?A:C */
53
+ } else {
54
+ tcg_out_mov(s, type, a0, a3);
55
+ sub = 0xb8; /* B?C:A */
56
+ }
57
+ goto gen_simd_imm8;
58
+
59
gen_simd_imm8:
60
tcg_debug_assert(insn != OPC_UD2);
61
if (type == TCG_TYPE_V256) {
62
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
63
case INDEX_op_x86_vpshrdv_vec:
64
return C_O1_I3(x, 0, x, x);
65
66
+ case INDEX_op_bitsel_vec:
67
case INDEX_op_x86_vpblendvb_vec:
68
return C_O1_I3(x, x, x, x);
69
70
@@ -XXX,XX +XXX,XX @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
71
case INDEX_op_nor_vec:
72
case INDEX_op_eqv_vec:
73
case INDEX_op_not_vec:
74
+ case INDEX_op_bitsel_vec:
75
return 1;
76
case INDEX_op_cmp_vec:
77
case INDEX_op_cmpsel_vec:
25
--
78
--
26
2.20.1
79
2.25.1
27
80
28
81
diff view generated by jsdifflib
1
The accel_list and tmp variables are only used when manufacturing
1
Define as 0 for all tcg hosts. Put this in a separate header,
2
-machine accel, options based on -accel.
2
because we'll want this in places that do not ordinarily have
3
access to all of tcg/tcg.h.
3
4
4
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: WANG Xuerui <git@xen0n.name>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
vl.c | 3 ++-
11
tcg/aarch64/tcg-target-sa32.h | 1 +
10
1 file changed, 2 insertions(+), 1 deletion(-)
12
tcg/arm/tcg-target-sa32.h | 1 +
13
tcg/i386/tcg-target-sa32.h | 1 +
14
tcg/loongarch64/tcg-target-sa32.h | 1 +
15
tcg/mips/tcg-target-sa32.h | 1 +
16
tcg/ppc/tcg-target-sa32.h | 1 +
17
tcg/riscv/tcg-target-sa32.h | 1 +
18
tcg/s390x/tcg-target-sa32.h | 1 +
19
tcg/sparc/tcg-target-sa32.h | 1 +
20
tcg/tci/tcg-target-sa32.h | 1 +
21
tcg/tcg.c | 4 ++++
22
11 files changed, 14 insertions(+)
23
create mode 100644 tcg/aarch64/tcg-target-sa32.h
24
create mode 100644 tcg/arm/tcg-target-sa32.h
25
create mode 100644 tcg/i386/tcg-target-sa32.h
26
create mode 100644 tcg/loongarch64/tcg-target-sa32.h
27
create mode 100644 tcg/mips/tcg-target-sa32.h
28
create mode 100644 tcg/ppc/tcg-target-sa32.h
29
create mode 100644 tcg/riscv/tcg-target-sa32.h
30
create mode 100644 tcg/s390x/tcg-target-sa32.h
31
create mode 100644 tcg/sparc/tcg-target-sa32.h
32
create mode 100644 tcg/tci/tcg-target-sa32.h
11
33
12
diff --git a/vl.c b/vl.c
34
diff --git a/tcg/aarch64/tcg-target-sa32.h b/tcg/aarch64/tcg-target-sa32.h
35
new file mode 100644
36
index XXXXXXX..XXXXXXX
37
--- /dev/null
38
+++ b/tcg/aarch64/tcg-target-sa32.h
39
@@ -0,0 +1 @@
40
+#define TCG_TARGET_SIGNED_ADDR32 0
41
diff --git a/tcg/arm/tcg-target-sa32.h b/tcg/arm/tcg-target-sa32.h
42
new file mode 100644
43
index XXXXXXX..XXXXXXX
44
--- /dev/null
45
+++ b/tcg/arm/tcg-target-sa32.h
46
@@ -0,0 +1 @@
47
+#define TCG_TARGET_SIGNED_ADDR32 0
48
diff --git a/tcg/i386/tcg-target-sa32.h b/tcg/i386/tcg-target-sa32.h
49
new file mode 100644
50
index XXXXXXX..XXXXXXX
51
--- /dev/null
52
+++ b/tcg/i386/tcg-target-sa32.h
53
@@ -0,0 +1 @@
54
+#define TCG_TARGET_SIGNED_ADDR32 0
55
diff --git a/tcg/loongarch64/tcg-target-sa32.h b/tcg/loongarch64/tcg-target-sa32.h
56
new file mode 100644
57
index XXXXXXX..XXXXXXX
58
--- /dev/null
59
+++ b/tcg/loongarch64/tcg-target-sa32.h
60
@@ -0,0 +1 @@
61
+#define TCG_TARGET_SIGNED_ADDR32 0
62
diff --git a/tcg/mips/tcg-target-sa32.h b/tcg/mips/tcg-target-sa32.h
63
new file mode 100644
64
index XXXXXXX..XXXXXXX
65
--- /dev/null
66
+++ b/tcg/mips/tcg-target-sa32.h
67
@@ -0,0 +1 @@
68
+#define TCG_TARGET_SIGNED_ADDR32 0
69
diff --git a/tcg/ppc/tcg-target-sa32.h b/tcg/ppc/tcg-target-sa32.h
70
new file mode 100644
71
index XXXXXXX..XXXXXXX
72
--- /dev/null
73
+++ b/tcg/ppc/tcg-target-sa32.h
74
@@ -0,0 +1 @@
75
+#define TCG_TARGET_SIGNED_ADDR32 0
76
diff --git a/tcg/riscv/tcg-target-sa32.h b/tcg/riscv/tcg-target-sa32.h
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/tcg/riscv/tcg-target-sa32.h
81
@@ -0,0 +1 @@
82
+#define TCG_TARGET_SIGNED_ADDR32 0
83
diff --git a/tcg/s390x/tcg-target-sa32.h b/tcg/s390x/tcg-target-sa32.h
84
new file mode 100644
85
index XXXXXXX..XXXXXXX
86
--- /dev/null
87
+++ b/tcg/s390x/tcg-target-sa32.h
88
@@ -0,0 +1 @@
89
+#define TCG_TARGET_SIGNED_ADDR32 0
90
diff --git a/tcg/sparc/tcg-target-sa32.h b/tcg/sparc/tcg-target-sa32.h
91
new file mode 100644
92
index XXXXXXX..XXXXXXX
93
--- /dev/null
94
+++ b/tcg/sparc/tcg-target-sa32.h
95
@@ -0,0 +1 @@
96
+#define TCG_TARGET_SIGNED_ADDR32 0
97
diff --git a/tcg/tci/tcg-target-sa32.h b/tcg/tci/tcg-target-sa32.h
98
new file mode 100644
99
index XXXXXXX..XXXXXXX
100
--- /dev/null
101
+++ b/tcg/tci/tcg-target-sa32.h
102
@@ -0,0 +1 @@
103
+#define TCG_TARGET_SIGNED_ADDR32 0
104
diff --git a/tcg/tcg.c b/tcg/tcg.c
13
index XXXXXXX..XXXXXXX 100644
105
index XXXXXXX..XXXXXXX 100644
14
--- a/vl.c
106
--- a/tcg/tcg.c
15
+++ b/vl.c
107
+++ b/tcg/tcg.c
16
@@ -XXX,XX +XXX,XX @@ static int do_configure_accelerator(void *opaque, QemuOpts *opts, Error **errp)
108
@@ -XXX,XX +XXX,XX @@
17
static void configure_accelerators(const char *progname)
109
#include "exec/log.h"
18
{
110
#include "tcg/tcg-ldst.h"
19
const char *accel;
111
#include "tcg-internal.h"
20
- char **accel_list, **tmp;
112
+#include "tcg-target-sa32.h"
21
bool init_failed = false;
22
23
qemu_opts_foreach(qemu_find_opts("icount"),
24
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
25
26
accel = qemu_opt_get(qemu_get_machine_opts(), "accel");
27
if (QTAILQ_EMPTY(&qemu_accel_opts.head)) {
28
+ char **accel_list, **tmp;
29
+
113
+
30
if (accel == NULL) {
114
+/* Sanity check for TCG_TARGET_SIGNED_ADDR32. */
31
/* Select the default accelerator */
115
+QEMU_BUILD_BUG_ON(TCG_TARGET_REG_BITS == 32 && TCG_TARGET_SIGNED_ADDR32);
32
if (!accel_find("tcg") && !accel_find("kvm")) {
116
117
#ifdef CONFIG_TCG_INTERPRETER
118
#include <ffi.h>
33
--
119
--
34
2.20.1
120
2.25.1
35
121
36
122
diff view generated by jsdifflib
1
There is only one caller for tlb_table_flush_by_mmuidx. Place
1
Create a new function to combine a CPUTLBEntry addend
2
the result at the earlier line number, due to an expected user
2
with the guest address to form a host address.
3
in the near future.
4
3
4
Reviewed-by: WANG Xuerui <git@xen0n.name>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
---
9
accel/tcg/cputlb.c | 19 +++++++------------
10
accel/tcg/cputlb.c | 24 ++++++++++++++----------
10
1 file changed, 7 insertions(+), 12 deletions(-)
11
1 file changed, 14 insertions(+), 10 deletions(-)
11
12
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/accel/tcg/cputlb.c
15
--- a/accel/tcg/cputlb.c
15
+++ b/accel/tcg/cputlb.c
16
+++ b/accel/tcg/cputlb.c
16
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
17
@@ -XXX,XX +XXX,XX @@ static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
18
return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
19
}
20
21
+static inline uintptr_t g2h_tlbe(const CPUTLBEntry *tlb, target_ulong gaddr)
22
+{
23
+ return tlb->addend + (uintptr_t)gaddr;
24
+}
25
+
26
static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
27
size_t max_entries)
28
{
29
@@ -XXX,XX +XXX,XX @@ static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
30
31
if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
32
TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
33
- addr &= TARGET_PAGE_MASK;
34
- addr += tlb_entry->addend;
35
+ addr = g2h_tlbe(tlb_entry, addr & TARGET_PAGE_MASK);
36
if ((addr - start) < length) {
37
#if TCG_OVERSIZED_GUEST
38
tlb_entry->addr_write |= TLB_NOTDIRTY;
39
@@ -XXX,XX +XXX,XX @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
40
return -1;
17
}
41
}
42
43
- p = (void *)((uintptr_t)addr + entry->addend);
44
+ p = (void *)g2h_tlbe(entry, addr);
45
if (hostp) {
46
*hostp = p;
47
}
48
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
49
}
50
51
/* Everything else is RAM. */
52
- *phost = (void *)((uintptr_t)addr + entry->addend);
53
+ *phost = (void *)g2h_tlbe(entry, addr);
54
return flags;
18
}
55
}
19
56
20
-static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
57
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
21
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
58
data->v.io.offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
22
{
59
} else {
23
tlb_mmu_resize_locked(env, mmu_idx);
60
data->is_io = false;
24
- memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
61
- data->v.ram.hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
25
env_tlb(env)->d[mmu_idx].n_used_entries = 0;
62
+ data->v.ram.hostaddr = (void *)g2h_tlbe(tlbe, addr);
26
+ env_tlb(env)->d[mmu_idx].large_page_addr = -1;
63
}
27
+ env_tlb(env)->d[mmu_idx].large_page_mask = -1;
64
return true;
28
+ env_tlb(env)->d[mmu_idx].vindex = 0;
65
} else {
29
+ memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
66
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
30
+ memset(env_tlb(env)->d[mmu_idx].vtable, -1,
67
goto stop_the_world;
31
+ sizeof(env_tlb(env)->d[0].vtable));
68
}
69
70
- hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
71
+ hostaddr = (void *)g2h_tlbe(tlbe, addr);
72
73
if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
74
notdirty_write(env_cpu(env), addr, size,
75
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
76
access_type, op ^ (need_swap * MO_BSWAP));
77
}
78
79
- haddr = (void *)((uintptr_t)addr + entry->addend);
80
+ haddr = (void *)g2h_tlbe(entry, addr);
81
82
/*
83
* Keep these two load_memop separate to ensure that the compiler
84
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
85
return res & MAKE_64BIT_MASK(0, size * 8);
86
}
87
88
- haddr = (void *)((uintptr_t)addr + entry->addend);
89
+ haddr = (void *)g2h_tlbe(entry, addr);
90
return load_memop(haddr, op);
32
}
91
}
33
92
34
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
93
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
35
@@ -XXX,XX +XXX,XX @@ void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
94
notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
36
*pelide = elide;
95
}
96
97
- haddr = (void *)((uintptr_t)addr + entry->addend);
98
+ haddr = (void *)g2h_tlbe(entry, addr);
99
100
/*
101
* Keep these two store_memop separate to ensure that the compiler
102
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
103
return;
104
}
105
106
- haddr = (void *)((uintptr_t)addr + entry->addend);
107
+ haddr = (void *)g2h_tlbe(entry, addr);
108
store_memop(haddr, val, op);
37
}
109
}
38
110
39
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
40
-{
41
- tlb_table_flush_by_mmuidx(env, mmu_idx);
42
- env_tlb(env)->d[mmu_idx].large_page_addr = -1;
43
- env_tlb(env)->d[mmu_idx].large_page_mask = -1;
44
- env_tlb(env)->d[mmu_idx].vindex = 0;
45
- memset(env_tlb(env)->d[mmu_idx].vtable, -1,
46
- sizeof(env_tlb(env)->d[0].vtable));
47
-}
48
-
49
static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
50
{
51
CPUArchState *env = cpu->env_ptr;
52
--
111
--
53
2.20.1
112
2.25.1
54
113
55
114
diff view generated by jsdifflib
1
In target/arm we will shortly have "too many" mmu_idx.
1
When TCG_TARGET_SIGNED_ADDR32 is set, adjust the tlb addend to
2
The current minimum barrier is caused by the way in which
2
allow the 32-bit guest address to be sign extended within the
3
tlb_flush_page_by_mmuidx is coded.
3
64-bit host register instead of zero extended.
4
4
5
We can remove this limitation by allocating memory for
5
This will simplify tcg hosts like MIPS, RISC-V, and LoongArch,
6
consumption by the worker. Let us assume that this is
6
which naturally sign-extend 32-bit values, in contrast to x86_64
7
the unlikely case, as will be the case for the majority
7
and AArch64 which zero-extend them.
8
of targets which have so far satisfied the BUILD_BUG_ON,
9
and only allocate memory when necessary.
10
8
11
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
12
---
14
accel/tcg/cputlb.c | 167 +++++++++++++++++++++++++++++++++++----------
13
accel/tcg/cputlb.c | 12 +++++++++++-
15
1 file changed, 132 insertions(+), 35 deletions(-)
14
1 file changed, 11 insertions(+), 1 deletion(-)
16
15
17
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
16
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/accel/tcg/cputlb.c
18
--- a/accel/tcg/cputlb.c
20
+++ b/accel/tcg/cputlb.c
19
+++ b/accel/tcg/cputlb.c
21
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
20
@@ -XXX,XX +XXX,XX @@
22
}
21
#include "qemu/plugin-memory.h"
22
#endif
23
#include "tcg/tcg-ldst.h"
24
+#include "tcg-target-sa32.h"
25
26
/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
27
/* #define DEBUG_TLB */
28
@@ -XXX,XX +XXX,XX @@ static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
29
30
static inline uintptr_t g2h_tlbe(const CPUTLBEntry *tlb, target_ulong gaddr)
31
{
32
+ if (TCG_TARGET_SIGNED_ADDR32 && TARGET_LONG_BITS == 32) {
33
+ return tlb->addend + (int32_t)gaddr;
34
+ }
35
return tlb->addend + (uintptr_t)gaddr;
23
}
36
}
24
37
25
-/* As we are going to hijack the bottom bits of the page address for a
38
@@ -XXX,XX +XXX,XX @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
26
- * mmuidx bit mask we need to fail to build if we can't do that
39
desc->iotlb[index].attrs = attrs;
27
+/**
40
28
+ * tlb_flush_page_by_mmuidx_async_0:
41
/* Now calculate the new entry */
29
+ * @cpu: cpu on which to flush
42
- tn.addend = addend - vaddr_page;
30
+ * @addr: page of virtual address to flush
31
+ * @idxmap: set of mmu_idx to flush
32
+ *
33
+ * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
34
+ * at @addr from the tlbs indicated by @idxmap from @cpu.
35
*/
36
-QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
37
-
38
-static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
39
- run_on_cpu_data data)
40
+static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
41
+ target_ulong addr,
42
+ uint16_t idxmap)
43
{
44
CPUArchState *env = cpu->env_ptr;
45
- target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
46
- target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
47
- unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
48
int mmu_idx;
49
50
assert_cpu_is_self(cpu);
51
52
- tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
53
- addr, mmu_idx_bitmap);
54
+ tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%x\n", addr, idxmap);
55
56
qemu_spin_lock(&env_tlb(env)->c.lock);
57
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
58
- if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
59
+ if ((idxmap >> mmu_idx) & 1) {
60
tlb_flush_page_locked(env, mmu_idx, addr);
61
}
62
}
63
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
64
tb_flush_jmp_cache(cpu, addr);
65
}
66
67
+/**
68
+ * tlb_flush_page_by_mmuidx_async_1:
69
+ * @cpu: cpu on which to flush
70
+ * @data: encoded addr + idxmap
71
+ *
72
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
73
+ * async_run_on_cpu. The idxmap parameter is encoded in the page
74
+ * offset of the target_ptr field. This limits the set of mmu_idx
75
+ * that can be passed via this method.
76
+ */
77
+static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
78
+ run_on_cpu_data data)
79
+{
80
+ target_ulong addr_and_idxmap = (target_ulong) data.target_ptr;
81
+ target_ulong addr = addr_and_idxmap & TARGET_PAGE_MASK;
82
+ uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
83
+
43
+
84
+ tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
44
+ if (TCG_TARGET_SIGNED_ADDR32 && TARGET_LONG_BITS == 32) {
85
+}
45
+ tn.addend = addend - (int32_t)vaddr_page;
86
+
87
+typedef struct {
88
+ target_ulong addr;
89
+ uint16_t idxmap;
90
+} TLBFlushPageByMMUIdxData;
91
+
92
+/**
93
+ * tlb_flush_page_by_mmuidx_async_2:
94
+ * @cpu: cpu on which to flush
95
+ * @data: allocated addr + idxmap
96
+ *
97
+ * Helper for tlb_flush_page_by_mmuidx and friends, called through
98
+ * async_run_on_cpu. The addr+idxmap parameters are stored in a
99
+ * TLBFlushPageByMMUIdxData structure that has been allocated
100
+ * specifically for this helper. Free the structure when done.
101
+ */
102
+static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
103
+ run_on_cpu_data data)
104
+{
105
+ TLBFlushPageByMMUIdxData *d = data.host_ptr;
106
+
107
+ tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
108
+ g_free(d);
109
+}
110
+
111
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
112
{
113
- target_ulong addr_and_mmu_idx;
114
-
115
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
116
117
/* This should already be page aligned */
118
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
119
- addr_and_mmu_idx |= idxmap;
120
+ addr &= TARGET_PAGE_MASK;
121
122
- if (!qemu_cpu_is_self(cpu)) {
123
- async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
124
- RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
125
+ if (qemu_cpu_is_self(cpu)) {
126
+ tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
127
+ } else if (idxmap < TARGET_PAGE_SIZE) {
128
+ /*
129
+ * Most targets have only a few mmu_idx. In the case where
130
+ * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
131
+ * allocating memory for this operation.
132
+ */
133
+ async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
134
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
135
} else {
136
- tlb_flush_page_by_mmuidx_async_work(
137
- cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
138
+ TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
139
+
140
+ /* Otherwise allocate a structure, freed by the worker. */
141
+ d->addr = addr;
142
+ d->idxmap = idxmap;
143
+ async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
144
+ RUN_ON_CPU_HOST_PTR(d));
145
}
146
}
147
148
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
149
void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
150
uint16_t idxmap)
151
{
152
- const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
153
- target_ulong addr_and_mmu_idx;
154
-
155
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
156
157
/* This should already be page aligned */
158
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
159
- addr_and_mmu_idx |= idxmap;
160
+ addr &= TARGET_PAGE_MASK;
161
162
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
163
- fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
164
+ /*
165
+ * Allocate memory to hold addr+idxmap only when needed.
166
+ * See tlb_flush_page_by_mmuidx for details.
167
+ */
168
+ if (idxmap < TARGET_PAGE_SIZE) {
169
+ flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
170
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
171
+ } else {
46
+ } else {
172
+ CPUState *dst_cpu;
47
+ tn.addend = addend - vaddr_page;
173
+
174
+ /* Allocate a separate data block for each destination cpu. */
175
+ CPU_FOREACH(dst_cpu) {
176
+ if (dst_cpu != src_cpu) {
177
+ TLBFlushPageByMMUIdxData *d
178
+ = g_new(TLBFlushPageByMMUIdxData, 1);
179
+
180
+ d->addr = addr;
181
+ d->idxmap = idxmap;
182
+ async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
183
+ RUN_ON_CPU_HOST_PTR(d));
184
+ }
185
+ }
186
+ }
48
+ }
187
+
49
+
188
+ tlb_flush_page_by_mmuidx_async_0(src_cpu, addr, idxmap);
50
if (prot & PAGE_READ) {
189
}
51
tn.addr_read = address;
190
52
if (wp_flags & BP_MEM_READ) {
191
void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
192
@@ -XXX,XX +XXX,XX @@ void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
193
target_ulong addr,
194
uint16_t idxmap)
195
{
196
- const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
197
- target_ulong addr_and_mmu_idx;
198
-
199
tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
200
201
/* This should already be page aligned */
202
- addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
203
- addr_and_mmu_idx |= idxmap;
204
+ addr &= TARGET_PAGE_MASK;
205
206
- flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
207
- async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
208
+ /*
209
+ * Allocate memory to hold addr+idxmap only when needed.
210
+ * See tlb_flush_page_by_mmuidx for details.
211
+ */
212
+ if (idxmap < TARGET_PAGE_SIZE) {
213
+ flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
214
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
215
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
216
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
217
+ } else {
218
+ CPUState *dst_cpu;
219
+ TLBFlushPageByMMUIdxData *d;
220
+
221
+ /* Allocate a separate data block for each destination cpu. */
222
+ CPU_FOREACH(dst_cpu) {
223
+ if (dst_cpu != src_cpu) {
224
+ d = g_new(TLBFlushPageByMMUIdxData, 1);
225
+ d->addr = addr;
226
+ d->idxmap = idxmap;
227
+ async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
228
+ RUN_ON_CPU_HOST_PTR(d));
229
+ }
230
+ }
231
+
232
+ d = g_new(TLBFlushPageByMMUIdxData, 1);
233
+ d->addr = addr;
234
+ d->idxmap = idxmap;
235
+ async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
236
+ RUN_ON_CPU_HOST_PTR(d));
237
+ }
238
}
239
240
void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
241
--
53
--
242
2.20.1
54
2.25.1
243
55
244
56
diff view generated by jsdifflib
1
There are no users of this function outside cputlb.c,
1
While the host may prefer to treat 32-bit addresses as signed,
2
and its interface will change in the next patch.
2
there are edge cases of guests that cannot be implemented with
3
addresses 0x7fff_ffff and 0x8000_0000 being non-consecutive.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Therefore, default to guest_base_signed_addr32 false, and allow
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
probe_guest_base to determine whether it is possible to set it
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
to true. A tcg backend which sets TCG_TARGET_SIGNED_ADDR32 will
8
have to cope with either setting for user-only.
9
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
11
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
13
---
9
include/exec/cpu_ldst.h | 5 -----
14
include/exec/cpu-all.h | 16 ++++++++++++++++
10
accel/tcg/cputlb.c | 5 +++++
15
include/exec/cpu_ldst.h | 3 ++-
11
2 files changed, 5 insertions(+), 5 deletions(-)
16
bsd-user/main.c | 4 ++++
17
linux-user/main.c | 3 +++
18
4 files changed, 25 insertions(+), 1 deletion(-)
12
19
20
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/exec/cpu-all.h
23
+++ b/include/exec/cpu-all.h
24
@@ -XXX,XX +XXX,XX @@ static inline void tswap64s(uint64_t *s)
25
26
#if defined(CONFIG_USER_ONLY)
27
#include "exec/user/abitypes.h"
28
+#include "tcg-target-sa32.h"
29
30
/* On some host systems the guest address space is reserved on the host.
31
* This allows the guest address space to be offset to a convenient location.
32
@@ -XXX,XX +XXX,XX @@ extern uintptr_t guest_base;
33
extern bool have_guest_base;
34
extern unsigned long reserved_va;
35
36
+#if TCG_TARGET_SIGNED_ADDR32 && TARGET_LONG_BITS == 32
37
+extern bool guest_base_signed_addr32;
38
+#else
39
+#define guest_base_signed_addr32 false
40
+#endif
41
+
42
+static inline void set_guest_base_signed_addr32(void)
43
+{
44
+#ifdef guest_base_signed_addr32
45
+ qemu_build_not_reached();
46
+#else
47
+ guest_base_signed_addr32 = true;
48
+#endif
49
+}
50
+
51
/*
52
* Limit the guest addresses as best we can.
53
*
13
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
54
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
14
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
15
--- a/include/exec/cpu_ldst.h
56
--- a/include/exec/cpu_ldst.h
16
+++ b/include/exec/cpu_ldst.h
57
+++ b/include/exec/cpu_ldst.h
17
@@ -XXX,XX +XXX,XX @@ static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
58
@@ -XXX,XX +XXX,XX @@ static inline abi_ptr cpu_untagged_addr(CPUState *cs, abi_ptr x)
18
return (addr >> TARGET_PAGE_BITS) & size_mask;
59
/* All direct uses of g2h and h2g need to go away for usermode softmmu. */
60
static inline void *g2h_untagged(abi_ptr x)
61
{
62
- return (void *)((uintptr_t)(x) + guest_base);
63
+ uintptr_t hx = guest_base_signed_addr32 ? (int32_t)x : (uintptr_t)x;
64
+ return (void *)(guest_base + hx);
19
}
65
}
20
66
21
-static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
67
static inline void *g2h(CPUState *cs, abi_ptr x)
22
-{
68
diff --git a/bsd-user/main.c b/bsd-user/main.c
23
- return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
24
-}
25
-
26
/* Find the TLB entry corresponding to the mmu_idx + address pair. */
27
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
28
target_ulong addr)
29
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
30
index XXXXXXX..XXXXXXX 100644
69
index XXXXXXX..XXXXXXX 100644
31
--- a/accel/tcg/cputlb.c
70
--- a/bsd-user/main.c
32
+++ b/accel/tcg/cputlb.c
71
+++ b/bsd-user/main.c
33
@@ -XXX,XX +XXX,XX @@ QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
72
@@ -XXX,XX +XXX,XX @@
34
QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
73
int singlestep;
35
#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
74
uintptr_t guest_base;
36
75
bool have_guest_base;
37
+static inline size_t tlb_n_entries(CPUArchState *env, uintptr_t mmu_idx)
76
+#ifndef guest_base_signed_addr32
38
+{
77
+bool guest_base_signed_addr32;
39
+ return (env_tlb(env)->f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS) + 1;
78
+#endif
40
+}
41
+
79
+
42
static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
80
/*
43
{
81
* When running 32-on-64 we should make sure we can fit all of the possible
44
return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
82
* guest address space into a contiguous chunk of virtual host memory.
83
diff --git a/linux-user/main.c b/linux-user/main.c
84
index XXXXXXX..XXXXXXX 100644
85
--- a/linux-user/main.c
86
+++ b/linux-user/main.c
87
@@ -XXX,XX +XXX,XX @@ static const char *seed_optarg;
88
unsigned long mmap_min_addr;
89
uintptr_t guest_base;
90
bool have_guest_base;
91
+#ifndef guest_base_signed_addr32
92
+bool guest_base_signed_addr32;
93
+#endif
94
95
/*
96
* Used to implement backwards-compatibility for the `-strace`, and
45
--
97
--
46
2.20.1
98
2.25.1
47
99
48
100
diff view generated by jsdifflib
1
The accel_initialised variable no longer has any setters.
1
When using reserved_va, which is the default for a 64-bit host
2
and a 32-bit guest, set guest_base_signed_addr32 if requested
3
by TCG_TARGET_SIGNED_ADDR32, and the executable layout allows.
2
4
3
Fixes: 6f6e1698a68c
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Reviewed by: Aleksandar Markovic <amarkovic@wavecomp.com>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
8
---
10
vl.c | 3 +--
9
include/exec/cpu-all.h | 4 ---
11
1 file changed, 1 insertion(+), 2 deletions(-)
10
linux-user/elfload.c | 62 ++++++++++++++++++++++++++++++++++--------
11
2 files changed, 50 insertions(+), 16 deletions(-)
12
12
13
diff --git a/vl.c b/vl.c
13
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/vl.c
15
--- a/include/exec/cpu-all.h
16
+++ b/vl.c
16
+++ b/include/exec/cpu-all.h
17
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
17
@@ -XXX,XX +XXX,XX @@ extern const TargetPageBits target_page;
18
#define PAGE_RESET 0x0040
19
/* For linux-user, indicates that the page is MAP_ANON. */
20
#define PAGE_ANON 0x0080
21
-
22
-#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
23
-/* FIXME: Code that sets/uses this is broken and needs to go away. */
24
#define PAGE_RESERVED 0x0100
25
-#endif
26
/* Target-specific bits that will be used via page_get_flags(). */
27
#define PAGE_TARGET_1 0x0200
28
#define PAGE_TARGET_2 0x0400
29
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/linux-user/elfload.c
32
+++ b/linux-user/elfload.c
33
@@ -XXX,XX +XXX,XX @@ static void pgb_dynamic(const char *image_name, long align)
34
static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
35
abi_ulong guest_hiaddr, long align)
18
{
36
{
19
const char *accel;
37
- int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
20
char **accel_list, **tmp;
38
+ int flags = (MAP_ANONYMOUS | MAP_PRIVATE |
21
- bool accel_initialised = false;
39
+ MAP_NORESERVE | MAP_FIXED_NOREPLACE);
22
bool init_failed = false;
40
+ unsigned long local_rva = reserved_va;
23
41
+ bool protect_wrap = false;
24
qemu_opts_foreach(qemu_find_opts("icount"),
42
void *addr, *test;
25
@@ -XXX,XX +XXX,XX @@ static void configure_accelerators(const char *progname)
43
26
44
- if (guest_hiaddr > reserved_va) {
27
accel_list = g_strsplit(accel, ":", 0);
45
+ if (guest_hiaddr > local_rva) {
28
46
error_report("%s: requires more than reserved virtual "
29
- for (tmp = accel_list; !accel_initialised && tmp && *tmp; tmp++) {
47
"address space (0x%" PRIx64 " > 0x%lx)",
30
+ for (tmp = accel_list; tmp && *tmp; tmp++) {
48
- image_name, (uint64_t)guest_hiaddr, reserved_va);
31
/*
49
+ image_name, (uint64_t)guest_hiaddr, local_rva);
32
* Filter invalid accelerators here, to prevent obscenities
50
exit(EXIT_FAILURE);
33
* such as "-machine accel=tcg,,thread=single".
51
}
52
53
- /* Widen the "image" to the entire reserved address space. */
54
- pgb_static(image_name, 0, reserved_va, align);
55
+ if (TCG_TARGET_SIGNED_ADDR32 && TARGET_LONG_BITS == 32) {
56
+ if (guest_loaddr < 0x80000000u && guest_hiaddr > 0x80000000u) {
57
+ /*
58
+ * The executable itself wraps on signed addresses.
59
+ * Without per-page translation, we must keep the
60
+ * guest address 0x7fff_ffff adjacent to 0x8000_0000
61
+ * consecutive in host memory: unsigned addresses.
62
+ */
63
+ } else {
64
+ set_guest_base_signed_addr32();
65
+ if (local_rva <= 0x80000000u) {
66
+ /* No guest addresses are "negative": win! */
67
+ } else {
68
+ /* Begin by allocating the entire address space. */
69
+ local_rva = 0xfffffffful + 1;
70
+ protect_wrap = true;
71
+ }
72
+ }
73
+ }
74
75
- /* osdep.h defines this as 0 if it's missing */
76
- flags |= MAP_FIXED_NOREPLACE;
77
+ /* Widen the "image" to the entire reserved address space. */
78
+ pgb_static(image_name, 0, local_rva, align);
79
+ assert(guest_base != 0);
80
81
/* Reserve the memory on the host. */
82
- assert(guest_base != 0);
83
test = g2h_untagged(0);
84
- addr = mmap(test, reserved_va, PROT_NONE, flags, -1, 0);
85
+ addr = mmap(test, local_rva, PROT_NONE, flags, -1, 0);
86
if (addr == MAP_FAILED || addr != test) {
87
+ /*
88
+ * If protect_wrap, we could try again with the original reserved_va
89
+ * setting, but the edge case of low ulimit vm setting on a 64-bit
90
+ * host is probably useless.
91
+ */
92
error_report("Unable to reserve 0x%lx bytes of virtual address "
93
- "space at %p (%s) for use as guest address space (check your"
94
- "virtual memory ulimit setting, min_mmap_addr or reserve less "
95
- "using -R option)", reserved_va, test, strerror(errno));
96
+ "space at %p (%s) for use as guest address space "
97
+ "(check your virtual memory ulimit setting, "
98
+ "min_mmap_addr or reserve less using -R option)",
99
+ local_rva, test, strerror(errno));
100
exit(EXIT_FAILURE);
101
}
102
103
+ if (protect_wrap) {
104
+ /*
105
+ * Prevent the page just before 0x80000000 from being allocated.
106
+ * This prevents a single guest object/allocation from crossing
107
+ * the signed wrap, and thus being discontiguous in host memory.
108
+ */
109
+ page_set_flags(0x7fffffff & TARGET_PAGE_MASK, 0x80000000u,
110
+ PAGE_RESERVED);
111
+ /* Adjust guest_base so that 0 is in the middle of the reservation. */
112
+ guest_base += 0x80000000ul;
113
+ }
114
+
115
qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %p for %lu bytes\n",
116
__func__, addr, reserved_va);
117
}
34
--
118
--
35
2.20.1
119
2.25.1
36
120
37
121
diff view generated by jsdifflib
New patch
1
AArch64 has both sign and zero-extending addressing modes, which
2
means that either treatment of guest addresses is equally efficient.
3
Enabling this for AArch64 gives us testing of the feature in CI.
1
4
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/aarch64/tcg-target-sa32.h | 8 +++-
9
tcg/aarch64/tcg-target.c.inc | 81 ++++++++++++++++++++++++-----------
10
2 files changed, 64 insertions(+), 25 deletions(-)
11
12
diff --git a/tcg/aarch64/tcg-target-sa32.h b/tcg/aarch64/tcg-target-sa32.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/aarch64/tcg-target-sa32.h
15
+++ b/tcg/aarch64/tcg-target-sa32.h
16
@@ -1 +1,7 @@
17
-#define TCG_TARGET_SIGNED_ADDR32 0
18
+/*
19
+ * AArch64 has both SXTW and UXTW addressing modes, which means that
20
+ * it is agnostic to how guest addresses should be represented.
21
+ * Because aarch64 is more common than the other hosts that will
22
+ * want to use this feature, enable it for continuous testing.
23
+ */
24
+#define TCG_TARGET_SIGNED_ADDR32 1
25
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
26
index XXXXXXX..XXXXXXX 100644
27
--- a/tcg/aarch64/tcg-target.c.inc
28
+++ b/tcg/aarch64/tcg-target.c.inc
29
@@ -XXX,XX +XXX,XX @@ typedef enum {
30
LDST_LD_S_W = 3, /* load and sign-extend into Wt */
31
} AArch64LdstType;
32
33
+/*
34
+ * See aarch64/instrs/extendreg/DecodeRegExtend
35
+ * But note that option<1> == 0 is UNDEFINED for LDR/STR.
36
+ */
37
+typedef enum {
38
+ LDST_EXT_UXTW = 2, /* zero-extend from uint32_t */
39
+ LDST_EXT_UXTX = 3, /* zero-extend from uint64_t (i.e. no extension) */
40
+ LDST_EXT_SXTW = 6, /* sign-extend from int32_t */
41
+} AArch64LdstExt;
42
+
43
/* We encode the format of the insn into the beginning of the name, so that
44
we can have the preprocessor help "typecheck" the insn vs the output
45
function. Arm didn't provide us with nice names for the formats, so we
46
@@ -XXX,XX +XXX,XX @@ static void tcg_out_insn_3617(TCGContext *s, AArch64Insn insn, bool q,
47
}
48
49
static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
50
- TCGReg rd, TCGReg base, TCGType ext,
51
+ TCGReg rd, TCGReg base, AArch64LdstExt option,
52
TCGReg regoff)
53
{
54
/* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
55
tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
56
- 0x4000 | ext << 13 | base << 5 | (rd & 0x1f));
57
+ option << 13 | base << 5 | (rd & 0x1f));
58
}
59
60
static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
61
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ldst(TCGContext *s, AArch64Insn insn, TCGReg rd,
62
63
/* Worst-case scenario, move offset to temp register, use reg offset. */
64
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
65
- tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
66
+ tcg_out_ldst_r(s, insn, rd, rn, LDST_EXT_UXTX, TCG_REG_TMP);
67
}
68
69
static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
70
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
71
72
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
73
TCGReg data_r, TCGReg addr_r,
74
- TCGType otype, TCGReg off_r)
75
+ AArch64LdstExt option, TCGReg off_r)
76
{
77
switch (memop & MO_SSIZE) {
78
case MO_UB:
79
- tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
80
+ tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, option, off_r);
81
break;
82
case MO_SB:
83
tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
84
- data_r, addr_r, otype, off_r);
85
+ data_r, addr_r, option, off_r);
86
break;
87
case MO_UW:
88
- tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
89
+ tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, option, off_r);
90
break;
91
case MO_SW:
92
tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
93
- data_r, addr_r, otype, off_r);
94
+ data_r, addr_r, option, off_r);
95
break;
96
case MO_UL:
97
- tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
98
+ tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, option, off_r);
99
break;
100
case MO_SL:
101
- tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
102
+ tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, option, off_r);
103
break;
104
case MO_UQ:
105
- tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
106
+ tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, option, off_r);
107
break;
108
default:
109
tcg_abort();
110
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
111
112
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
113
TCGReg data_r, TCGReg addr_r,
114
- TCGType otype, TCGReg off_r)
115
+ AArch64LdstExt option, TCGReg off_r)
116
{
117
switch (memop & MO_SIZE) {
118
case MO_8:
119
- tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
120
+ tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, option, off_r);
121
break;
122
case MO_16:
123
- tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
124
+ tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, option, off_r);
125
break;
126
case MO_32:
127
- tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
128
+ tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, option, off_r);
129
break;
130
case MO_64:
131
- tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
132
+ tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, option, off_r);
133
break;
134
default:
135
tcg_abort();
136
}
137
}
138
139
+/*
140
+ * Bits for the option field of LDR/STR (register),
141
+ * for application to a guest address.
142
+ */
143
+static AArch64LdstExt ldst_ext_option(void)
144
+{
145
+#ifdef CONFIG_USER_ONLY
146
+ bool signed_addr32 = guest_base_signed_addr32;
147
+#else
148
+ bool signed_addr32 = TCG_TARGET_SIGNED_ADDR32;
149
+#endif
150
+
151
+ if (TARGET_LONG_BITS == 64) {
152
+ return LDST_EXT_UXTX;
153
+ } else if (signed_addr32) {
154
+ return LDST_EXT_SXTW;
155
+ } else {
156
+ return LDST_EXT_UXTW;
157
+ }
158
+}
159
+
160
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
161
MemOpIdx oi, TCGType ext)
162
{
163
MemOp memop = get_memop(oi);
164
- const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
165
+ AArch64LdstExt option = ldst_ext_option();
166
167
/* Byte swapping is left to middle-end expansion. */
168
tcg_debug_assert((memop & MO_BSWAP) == 0);
169
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
170
171
tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
172
tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
173
- TCG_REG_X1, otype, addr_reg);
174
+ TCG_REG_X1, option, addr_reg);
175
add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
176
s->code_ptr, label_ptr);
177
#else /* !CONFIG_SOFTMMU */
178
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
179
}
180
if (USE_GUEST_BASE) {
181
tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
182
- TCG_REG_GUEST_BASE, otype, addr_reg);
183
+ TCG_REG_GUEST_BASE, option, addr_reg);
184
} else {
185
+ /* This case is always a 64-bit guest with no extension. */
186
tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
187
- addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
188
+ addr_reg, LDST_EXT_UXTX, TCG_REG_XZR);
189
}
190
#endif /* CONFIG_SOFTMMU */
191
}
192
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
193
MemOpIdx oi)
194
{
195
MemOp memop = get_memop(oi);
196
- const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
197
+ AArch64LdstExt option = ldst_ext_option();
198
199
/* Byte swapping is left to middle-end expansion. */
200
tcg_debug_assert((memop & MO_BSWAP) == 0);
201
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
202
203
tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
204
tcg_out_qemu_st_direct(s, memop, data_reg,
205
- TCG_REG_X1, otype, addr_reg);
206
+ TCG_REG_X1, option, addr_reg);
207
add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
208
data_reg, addr_reg, s->code_ptr, label_ptr);
209
#else /* !CONFIG_SOFTMMU */
210
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
211
}
212
if (USE_GUEST_BASE) {
213
tcg_out_qemu_st_direct(s, memop, data_reg,
214
- TCG_REG_GUEST_BASE, otype, addr_reg);
215
+ TCG_REG_GUEST_BASE, option, addr_reg);
216
} else {
217
+ /* This case is always a 64-bit guest with no extension. */
218
tcg_out_qemu_st_direct(s, memop, data_reg,
219
- addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
220
+ addr_reg, LDST_EXT_UXTX, TCG_REG_XZR);
221
}
222
#endif /* CONFIG_SOFTMMU */
223
}
224
--
225
2.25.1
diff view generated by jsdifflib
1
Do not call get_clock_realtime() in tlb_mmu_resize_locked,
1
All 32-bit mips operations sign-extend the output, so we are easily
2
but hoist outside of any loop over a set of tlbs. This is
2
able to keep TCG_TYPE_I32 values sign-extended in host registers.
3
only two (indirect) callers, tlb_flush_by_mmuidx_async_work
4
and tlb_flush_page_locked, so not onerous.
5
3
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
accel/tcg/cputlb.c | 14 ++++++++------
7
tcg/mips/tcg-target-sa32.h | 8 ++++++++
12
1 file changed, 8 insertions(+), 6 deletions(-)
8
tcg/mips/tcg-target.c.inc | 10 ++--------
9
2 files changed, 10 insertions(+), 8 deletions(-)
13
10
14
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
11
diff --git a/tcg/mips/tcg-target-sa32.h b/tcg/mips/tcg-target-sa32.h
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/accel/tcg/cputlb.c
13
--- a/tcg/mips/tcg-target-sa32.h
17
+++ b/accel/tcg/cputlb.c
14
+++ b/tcg/mips/tcg-target-sa32.h
18
@@ -XXX,XX +XXX,XX @@ static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
15
@@ -1 +1,9 @@
19
* high), since otherwise we are likely to have a significant amount of
16
+/*
20
* conflict misses.
17
+ * Do not set TCG_TARGET_SIGNED_ADDR32 for mips32;
21
*/
18
+ * TCG expects this to only be set for 64-bit hosts.
22
-static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
19
+ */
23
+static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
20
+#ifdef __mips64
24
+ int64_t now)
21
+#define TCG_TARGET_SIGNED_ADDR32 1
25
{
22
+#else
26
size_t old_size = tlb_n_entries(fast);
23
#define TCG_TARGET_SIGNED_ADDR32 0
27
size_t rate;
24
+#endif
28
size_t new_size = old_size;
25
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
29
- int64_t now = get_clock_realtime();
26
index XXXXXXX..XXXXXXX 100644
30
int64_t window_len_ms = 100;
27
--- a/tcg/mips/tcg-target.c.inc
31
int64_t window_len_ns = window_len_ms * 1000 * 1000;
28
+++ b/tcg/mips/tcg-target.c.inc
32
bool window_expired = now > desc->window_begin_ns + window_len_ns;
29
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
33
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
30
TCG_TMP0, TCG_TMP3, cmp_off);
34
memset(desc->vtable, -1, sizeof(desc->vtable));
35
}
36
37
-static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
38
+static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx,
39
+ int64_t now)
40
{
41
CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
42
CPUTLBDescFast *fast = &env_tlb(env)->f[mmu_idx];
43
44
- tlb_mmu_resize_locked(desc, fast);
45
+ tlb_mmu_resize_locked(desc, fast, now);
46
tlb_mmu_flush_locked(desc, fast);
47
}
48
49
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
50
CPUArchState *env = cpu->env_ptr;
51
uint16_t asked = data.host_int;
52
uint16_t all_dirty, work, to_clean;
53
+ int64_t now = get_clock_realtime();
54
55
assert_cpu_is_self(cpu);
56
57
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
58
59
for (work = to_clean; work != 0; work &= work - 1) {
60
int mmu_idx = ctz32(work);
61
- tlb_flush_one_mmuidx_locked(env, mmu_idx);
62
+ tlb_flush_one_mmuidx_locked(env, mmu_idx, now);
63
}
31
}
64
32
65
qemu_spin_unlock(&env_tlb(env)->c.lock);
33
- /* Zero extend a 32-bit guest address for a 64-bit host. */
66
@@ -XXX,XX +XXX,XX @@ static void tlb_flush_page_locked(CPUArchState *env, int midx,
34
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
67
tlb_debug("forcing full flush midx %d ("
35
- tcg_out_ext32u(s, base, addrl);
68
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
36
- addrl = base;
69
midx, lp_addr, lp_mask);
37
- }
70
- tlb_flush_one_mmuidx_locked(env, midx);
38
-
71
+ tlb_flush_one_mmuidx_locked(env, midx, get_clock_realtime());
39
/*
72
} else {
40
* Mask the page bits, keeping the alignment bits to compare against.
73
if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
41
* For unaligned accesses, compare against the end of the access to
74
tlb_n_used_entries_dec(env, midx);
42
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
43
data_regl, data_regh, addr_regl, addr_regh,
44
s->code_ptr, label_ptr);
45
#else
46
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
47
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS && !guest_base_signed_addr32) {
48
tcg_out_ext32u(s, base, addr_regl);
49
addr_regl = base;
50
}
51
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
52
data_regl, data_regh, addr_regl, addr_regh,
53
s->code_ptr, label_ptr);
54
#else
55
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
56
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS && !guest_base_signed_addr32) {
57
tcg_out_ext32u(s, base, addr_regl);
58
addr_regl = base;
59
}
75
--
60
--
76
2.20.1
61
2.25.1
77
62
78
63
diff view generated by jsdifflib
1
There's little point in leaving these data structures half initialized,
1
All RV64 32-bit operations sign-extend the output, so we are easily
2
and relying on a flush to be done during reset.
2
able to keep TCG_TYPE_I32 values sign-extended in host registers.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
5
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
accel/tcg/cputlb.c | 5 +++--
8
tcg/riscv/tcg-target-sa32.h | 6 +++++-
9
1 file changed, 3 insertions(+), 2 deletions(-)
9
tcg/riscv/tcg-target.c.inc | 8 ++------
10
2 files changed, 7 insertions(+), 7 deletions(-)
10
11
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
diff --git a/tcg/riscv/tcg-target-sa32.h b/tcg/riscv/tcg-target-sa32.h
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/accel/tcg/cputlb.c
14
--- a/tcg/riscv/tcg-target-sa32.h
14
+++ b/accel/tcg/cputlb.c
15
+++ b/tcg/riscv/tcg-target-sa32.h
15
@@ -XXX,XX +XXX,XX @@ static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
16
@@ -1 +1,5 @@
16
fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
17
-#define TCG_TARGET_SIGNED_ADDR32 0
17
fast->table = g_new(CPUTLBEntry, n_entries);
18
+/*
18
desc->iotlb = g_new(CPUIOTLBEntry, n_entries);
19
+ * Do not set TCG_TARGET_SIGNED_ADDR32 for RV32;
19
+ tlb_mmu_flush_locked(desc, fast);
20
+ * TCG expects this to only be set for 64-bit hosts.
21
+ */
22
+#define TCG_TARGET_SIGNED_ADDR32 (__riscv_xlen == 64)
23
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
24
index XXXXXXX..XXXXXXX 100644
25
--- a/tcg/riscv/tcg-target.c.inc
26
+++ b/tcg/riscv/tcg-target.c.inc
27
@@ -XXX,XX +XXX,XX @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl,
28
tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
29
30
/* TLB Hit - translate address using addend. */
31
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
32
- tcg_out_ext32u(s, TCG_REG_TMP0, addrl);
33
- addrl = TCG_REG_TMP0;
34
- }
35
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
20
}
36
}
21
37
22
static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
38
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
23
@@ -XXX,XX +XXX,XX @@ void tlb_init(CPUState *cpu)
39
data_regl, data_regh, addr_regl, addr_regh,
24
40
s->code_ptr, label_ptr);
25
qemu_spin_init(&env_tlb(env)->c.lock);
41
#else
26
42
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
27
- /* Ensure that cpu_reset performs a full flush. */
43
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS && !guest_base_signed_addr32) {
28
- env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
44
tcg_out_ext32u(s, base, addr_regl);
29
+ /* All tlbs are initialized flushed. */
45
addr_regl = base;
30
+ env_tlb(env)->c.dirty = 0;
46
}
31
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
32
for (i = 0; i < NB_MMU_MODES; i++) {
48
data_regl, data_regh, addr_regl, addr_regh,
33
tlb_mmu_init(&env_tlb(env)->d[i], &env_tlb(env)->f[i], now);
49
s->code_ptr, label_ptr);
50
#else
51
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
52
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS && !guest_base_signed_addr32) {
53
tcg_out_ext32u(s, base, addr_regl);
54
addr_regl = base;
55
}
34
--
56
--
35
2.20.1
57
2.25.1
36
58
37
59
diff view generated by jsdifflib
1
From: Carlos Santos <casantos@redhat.com>
1
All 32-bit LoongArch operations sign-extend the output, so we are easily
2
able to keep TCG_TYPE_I32 values sign-extended in host registers.
2
3
3
uClibc defines _SC_LEVEL1_ICACHE_LINESIZE and _SC_LEVEL1_DCACHE_LINESIZE
4
Cc: WANG Xuerui <git@xen0n.name>
4
but the corresponding sysconf calls returns -1, which is a valid result,
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
meaning that the limit is indeterminate.
6
7
Handle this situation using the fallback values instead of crashing due
8
to an assertion failure.
9
10
Signed-off-by: Carlos Santos <casantos@redhat.com>
11
Message-Id: <20191017123713.30192-1-casantos@redhat.com>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
7
---
14
util/cacheinfo.c | 10 ++++++++--
8
tcg/loongarch64/tcg-target-sa32.h | 2 +-
15
1 file changed, 8 insertions(+), 2 deletions(-)
9
tcg/loongarch64/tcg-target.c.inc | 15 ++++++---------
10
2 files changed, 7 insertions(+), 10 deletions(-)
16
11
17
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
12
diff --git a/tcg/loongarch64/tcg-target-sa32.h b/tcg/loongarch64/tcg-target-sa32.h
18
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
19
--- a/util/cacheinfo.c
14
--- a/tcg/loongarch64/tcg-target-sa32.h
20
+++ b/util/cacheinfo.c
15
+++ b/tcg/loongarch64/tcg-target-sa32.h
21
@@ -XXX,XX +XXX,XX @@ static void sys_cache_info(int *isize, int *dsize)
16
@@ -1 +1 @@
22
static void sys_cache_info(int *isize, int *dsize)
17
-#define TCG_TARGET_SIGNED_ADDR32 0
18
+#define TCG_TARGET_SIGNED_ADDR32 1
19
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/loongarch64/tcg-target.c.inc
22
+++ b/tcg/loongarch64/tcg-target.c.inc
23
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
24
return tcg_out_fail_alignment(s, l);
25
}
26
27
-#endif /* CONFIG_SOFTMMU */
28
-
29
/*
30
* `ext32u` the address register into the temp register given,
31
* if target is 32-bit, no-op otherwise.
32
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
33
static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
34
TCGReg addr, TCGReg tmp)
23
{
35
{
24
# ifdef _SC_LEVEL1_ICACHE_LINESIZE
36
- if (TARGET_LONG_BITS == 32) {
25
- *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
37
+ if (TARGET_LONG_BITS == 32 && !guest_base_signed_addr32) {
26
+ int tmp_isize = (int) sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
38
tcg_out_ext32u(s, tmp, addr);
27
+ if (tmp_isize > 0) {
39
return tmp;
28
+ *isize = tmp_isize;
40
}
29
+ }
41
return addr;
30
# endif
31
# ifdef _SC_LEVEL1_DCACHE_LINESIZE
32
- *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
33
+ int tmp_dsize = (int) sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
34
+ if (tmp_dsize > 0) {
35
+ *dsize = tmp_dsize;
36
+ }
37
# endif
38
}
42
}
39
#endif /* sys_cache_info */
43
+#endif /* CONFIG_SOFTMMU */
44
45
static void tcg_out_qemu_ld_indexed(TCGContext *s, TCGReg rd, TCGReg rj,
46
TCGReg rk, MemOp opc, TCGType type)
47
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type)
48
tcg_insn_unit *label_ptr[1];
49
#else
50
unsigned a_bits;
51
-#endif
52
TCGReg base;
53
+#endif
54
55
data_regl = *args++;
56
addr_regl = *args++;
57
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type)
58
59
#if defined(CONFIG_SOFTMMU)
60
tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 1);
61
- base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
62
- tcg_out_qemu_ld_indexed(s, data_regl, base, TCG_REG_TMP2, opc, type);
63
+ tcg_out_qemu_ld_indexed(s, data_regl, addr_regl, TCG_REG_TMP2, opc, type);
64
add_qemu_ldst_label(s, 1, oi, type,
65
data_regl, addr_regl,
66
s->code_ptr, label_ptr);
67
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
68
tcg_insn_unit *label_ptr[1];
69
#else
70
unsigned a_bits;
71
-#endif
72
TCGReg base;
73
+#endif
74
75
data_regl = *args++;
76
addr_regl = *args++;
77
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
78
79
#if defined(CONFIG_SOFTMMU)
80
tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 0);
81
- base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
82
- tcg_out_qemu_st_indexed(s, data_regl, base, TCG_REG_TMP2, opc);
83
+ tcg_out_qemu_st_indexed(s, data_regl, addr_regl, TCG_REG_TMP2, opc);
84
add_qemu_ldst_label(s, 0, oi,
85
0, /* type param is unused for stores */
86
data_regl, addr_regl,
40
--
87
--
41
2.20.1
88
2.25.1
42
89
43
90
diff view generated by jsdifflib