1
The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:
1
Changes from v1:
2
* Patch 10 is new, avoiding an overflow in probe_guest_base,
3
visible with aarch64 host, --static --disable-pie, exposed
4
by the placement of the host binary in the address space.
2
5
3
Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)
6
r~
4
7
5
are available in the Git repository at:
8
Emilio Cota (2):
9
util: import GTree as QTree
10
tcg: use QTree instead of GTree
6
11
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211027
12
Richard Henderson (10):
13
linux-user: Diagnose misaligned -R size
14
accel/tcg: Pass last not end to page_set_flags
15
accel/tcg: Pass last not end to page_reset_target_data
16
accel/tcg: Pass last not end to PAGE_FOR_EACH_TB
17
accel/tcg: Pass last not end to page_collection_lock
18
accel/tcg: Pass last not end to tb_invalidate_phys_page_range__locked
19
accel/tcg: Pass last not end to tb_invalidate_phys_range
20
linux-user: Pass last not end to probe_guest_base
21
include/exec: Change reserved_va semantics to last byte
22
linux-user/arm: Take more care allocating commpage
8
23
9
for you to fetch changes up to 820c025f0dcacf2f3c12735b1f162893fbfa7bc6:
24
configure | 15 +
25
meson.build | 4 +
26
include/exec/cpu-all.h | 15 +-
27
include/exec/exec-all.h | 2 +-
28
include/qemu/qtree.h | 201 +++++
29
linux-user/arm/target_cpu.h | 2 +-
30
accel/tcg/tb-maint.c | 112 +--
31
accel/tcg/translate-all.c | 2 +-
32
accel/tcg/user-exec.c | 25 +-
33
bsd-user/main.c | 10 +-
34
bsd-user/mmap.c | 10 +-
35
linux-user/elfload.c | 72 +-
36
linux-user/flatload.c | 2 +-
37
linux-user/main.c | 31 +-
38
linux-user/mmap.c | 22 +-
39
linux-user/syscall.c | 4 +-
40
softmmu/physmem.c | 2 +-
41
tcg/region.c | 19 +-
42
tests/bench/qtree-bench.c | 286 +++++++
43
tests/unit/test-qtree.c | 333 +++++++++
44
util/qtree.c | 1390 +++++++++++++++++++++++++++++++++++
45
tests/bench/meson.build | 4 +
46
tests/unit/meson.build | 1 +
47
util/meson.build | 1 +
48
24 files changed, 2415 insertions(+), 150 deletions(-)
49
create mode 100644 include/qemu/qtree.h
50
create mode 100644 tests/bench/qtree-bench.c
51
create mode 100644 tests/unit/test-qtree.c
52
create mode 100644 util/qtree.c
10
53
11
tcg/optimize: Propagate sign info for shifting (2021-10-27 17:11:23 -0700)
54
--
12
55
2.34.1
13
----------------------------------------------------------------
14
Improvements to qemu/int128
15
Fixes for 128/64 division.
16
Cleanup tcg/optimize.c
17
Optimize redundant sign extensions
18
19
----------------------------------------------------------------
20
Frédéric Pétrot (1):
21
qemu/int128: Add int128_{not,xor}
22
23
Luis Pires (4):
24
host-utils: move checks out of divu128/divs128
25
host-utils: move udiv_qrnnd() to host-utils
26
host-utils: add 128-bit quotient support to divu128/divs128
27
host-utils: add unit tests for divu128/divs128
28
29
Richard Henderson (51):
30
tcg/optimize: Rename "mask" to "z_mask"
31
tcg/optimize: Split out OptContext
32
tcg/optimize: Remove do_default label
33
tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
34
tcg/optimize: Move prev_mb into OptContext
35
tcg/optimize: Split out init_arguments
36
tcg/optimize: Split out copy_propagate
37
tcg/optimize: Split out fold_call
38
tcg/optimize: Drop nb_oargs, nb_iargs locals
39
tcg/optimize: Change fail return for do_constant_folding_cond*
40
tcg/optimize: Return true from tcg_opt_gen_{mov,movi}
41
tcg/optimize: Split out finish_folding
42
tcg/optimize: Use a boolean to avoid a mass of continues
43
tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
44
tcg/optimize: Split out fold_const{1,2}
45
tcg/optimize: Split out fold_setcond2
46
tcg/optimize: Split out fold_brcond2
47
tcg/optimize: Split out fold_brcond
48
tcg/optimize: Split out fold_setcond
49
tcg/optimize: Split out fold_mulu2_i32
50
tcg/optimize: Split out fold_addsub2_i32
51
tcg/optimize: Split out fold_movcond
52
tcg/optimize: Split out fold_extract2
53
tcg/optimize: Split out fold_extract, fold_sextract
54
tcg/optimize: Split out fold_deposit
55
tcg/optimize: Split out fold_count_zeros
56
tcg/optimize: Split out fold_bswap
57
tcg/optimize: Split out fold_dup, fold_dup2
58
tcg/optimize: Split out fold_mov
59
tcg/optimize: Split out fold_xx_to_i
60
tcg/optimize: Split out fold_xx_to_x
61
tcg/optimize: Split out fold_xi_to_i
62
tcg/optimize: Add type to OptContext
63
tcg/optimize: Split out fold_to_not
64
tcg/optimize: Split out fold_sub_to_neg
65
tcg/optimize: Split out fold_xi_to_x
66
tcg/optimize: Split out fold_ix_to_i
67
tcg/optimize: Split out fold_masks
68
tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
69
tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
70
tcg/optimize: Sink commutative operand swapping into fold functions
71
tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
72
tcg/optimize: Use fold_xx_to_i for orc
73
tcg/optimize: Use fold_xi_to_x for mul
74
tcg/optimize: Use fold_xi_to_x for div
75
tcg/optimize: Use fold_xx_to_i for rem
76
tcg/optimize: Optimize sign extensions
77
tcg/optimize: Propagate sign info for logical operations
78
tcg/optimize: Propagate sign info for setcond
79
tcg/optimize: Propagate sign info for bit counting
80
tcg/optimize: Propagate sign info for shifting
81
82
include/fpu/softfloat-macros.h | 82 --
83
include/hw/clock.h | 5 +-
84
include/qemu/host-utils.h | 121 +-
85
include/qemu/int128.h | 20 +
86
target/ppc/int_helper.c | 23 +-
87
tcg/optimize.c | 2644 ++++++++++++++++++++++++----------------
88
tests/unit/test-div128.c | 197 +++
89
util/host-utils.c | 147 ++-
90
tests/unit/meson.build | 1 +
91
9 files changed, 2053 insertions(+), 1187 deletions(-)
92
create mode 100644 tests/unit/test-div128.c
93
diff view generated by jsdifflib
Deleted patch
1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
2
1
3
Addition of not and xor on 128-bit integers.
4
5
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
6
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
7
Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
8
[rth: Split out logical operations.]
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/qemu/int128.h | 20 ++++++++++++++++++++
13
1 file changed, 20 insertions(+)
14
15
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/int128.h
18
+++ b/include/qemu/int128.h
19
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
20
return a;
21
}
22
23
+static inline Int128 int128_not(Int128 a)
24
+{
25
+ return ~a;
26
+}
27
+
28
static inline Int128 int128_and(Int128 a, Int128 b)
29
{
30
return a & b;
31
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
32
return a | b;
33
}
34
35
+static inline Int128 int128_xor(Int128 a, Int128 b)
36
+{
37
+ return a ^ b;
38
+}
39
+
40
static inline Int128 int128_rshift(Int128 a, int n)
41
{
42
return a >> n;
43
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
44
return int128_make128(a, (a < 0) ? -1 : 0);
45
}
46
47
+static inline Int128 int128_not(Int128 a)
48
+{
49
+ return int128_make128(~a.lo, ~a.hi);
50
+}
51
+
52
static inline Int128 int128_and(Int128 a, Int128 b)
53
{
54
return int128_make128(a.lo & b.lo, a.hi & b.hi);
55
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
56
return int128_make128(a.lo | b.lo, a.hi | b.hi);
57
}
58
59
+static inline Int128 int128_xor(Int128 a, Int128 b)
60
+{
61
+ return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
62
+}
63
+
64
static inline Int128 int128_rshift(Int128 a, int n)
65
{
66
int64_t h;
67
--
68
2.25.1
69
70
diff view generated by jsdifflib
Deleted patch
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
1
3
In preparation for changing the divu128/divs128 implementations
4
to allow for quotients larger than 64 bits, move the div-by-zero
5
and overflow checks to the callers.
6
7
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
include/hw/clock.h | 5 +++--
13
include/qemu/host-utils.h | 34 ++++++++++++---------------------
14
target/ppc/int_helper.c | 14 +++++++++-----
15
util/host-utils.c | 40 ++++++++++++++++++---------------------
16
4 files changed, 42 insertions(+), 51 deletions(-)
17
18
diff --git a/include/hw/clock.h b/include/hw/clock.h
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/clock.h
21
+++ b/include/hw/clock.h
22
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
23
return 0;
24
}
25
/*
26
- * Ignore divu128() return value as we've caught div-by-zero and don't
27
- * need different behaviour for overflow.
28
+ * BUG: when CONFIG_INT128 is not defined, the current implementation of
29
+ * divu128 does not return a valid truncated quotient, so the result will
30
+ * be wrong.
31
*/
32
divu128(&lo, &hi, clk->period);
33
return lo;
34
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/qemu/host-utils.h
37
+++ b/include/qemu/host-utils.h
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
39
return (__int128_t)a * b / c;
40
}
41
42
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
43
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
44
{
45
- if (divisor == 0) {
46
- return 1;
47
- } else {
48
- __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
49
- __uint128_t result = dividend / divisor;
50
- *plow = result;
51
- *phigh = dividend % divisor;
52
- return result > UINT64_MAX;
53
- }
54
+ __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
55
+ __uint128_t result = dividend / divisor;
56
+ *plow = result;
57
+ *phigh = dividend % divisor;
58
}
59
60
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
61
+static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
62
{
63
- if (divisor == 0) {
64
- return 1;
65
- } else {
66
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
67
- __int128_t result = dividend / divisor;
68
- *plow = result;
69
- *phigh = dividend % divisor;
70
- return result != *plow;
71
- }
72
+ __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
73
+ __int128_t result = dividend / divisor;
74
+ *plow = result;
75
+ *phigh = dividend % divisor;
76
}
77
#else
78
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
79
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
80
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
81
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
82
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
83
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
84
85
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
86
{
87
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/ppc/int_helper.c
90
+++ b/target/ppc/int_helper.c
91
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
92
uint64_t rt = 0;
93
int overflow = 0;
94
95
- overflow = divu128(&rt, &ra, rb);
96
-
97
- if (unlikely(overflow)) {
98
+ if (unlikely(rb == 0 || ra >= rb)) {
99
+ overflow = 1;
100
rt = 0; /* Undefined */
101
+ } else {
102
+ divu128(&rt, &ra, rb);
103
}
104
105
if (oe) {
106
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
107
int64_t rt = 0;
108
int64_t ra = (int64_t)rau;
109
int64_t rb = (int64_t)rbu;
110
- int overflow = divs128(&rt, &ra, rb);
111
+ int overflow = 0;
112
113
- if (unlikely(overflow)) {
114
+ if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
115
+ overflow = 1;
116
rt = 0; /* Undefined */
117
+ } else {
118
+ divs128(&rt, &ra, rb);
119
}
120
121
if (oe) {
122
diff --git a/util/host-utils.c b/util/host-utils.c
123
index XXXXXXX..XXXXXXX 100644
124
--- a/util/host-utils.c
125
+++ b/util/host-utils.c
126
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
127
*phigh = rh;
128
}
129
130
-/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
131
-/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
132
-/* remainder via phigh. */
133
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
134
+/*
135
+ * Unsigned 128-by-64 division. Returns quotient via plow and
136
+ * remainder via phigh.
137
+ * The result must fit in 64 bits (plow) - otherwise, the result
138
+ * is undefined.
139
+ * This function will cause a division by zero if passed a zero divisor.
140
+ */
141
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
142
{
143
uint64_t dhi = *phigh;
144
uint64_t dlo = *plow;
145
unsigned i;
146
uint64_t carry = 0;
147
148
- if (divisor == 0) {
149
- return 1;
150
- } else if (dhi == 0) {
151
+ if (divisor == 0 || dhi == 0) {
152
*plow = dlo / divisor;
153
*phigh = dlo % divisor;
154
- return 0;
155
- } else if (dhi >= divisor) {
156
- return 1;
157
} else {
158
159
for (i = 0; i < 64; i++) {
160
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
161
162
*plow = dlo;
163
*phigh = dhi;
164
- return 0;
165
}
166
}
167
168
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
169
+/*
170
+ * Signed 128-by-64 division. Returns quotient via plow and
171
+ * remainder via phigh.
172
+ * The result must fit in 64 bits (plow) - otherwise, the result
173
+ * is undefined.
174
+ * This function will cause a division by zero if passed a zero divisor.
175
+ */
176
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
177
{
178
int sgn_dvdnd = *phigh < 0;
179
int sgn_divsr = divisor < 0;
180
- int overflow = 0;
181
182
if (sgn_dvdnd) {
183
*plow = ~(*plow);
184
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
185
divisor = 0 - divisor;
186
}
187
188
- overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
189
+ divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
190
191
if (sgn_dvdnd ^ sgn_divsr) {
192
*plow = 0 - *plow;
193
}
194
-
195
- if (!overflow) {
196
- if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
197
- overflow = 1;
198
- }
199
- }
200
-
201
- return overflow;
202
}
203
#endif
204
205
--
206
2.25.1
207
208
diff view generated by jsdifflib
Deleted patch
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
1
3
Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
4
so it can be reused by divu128().
5
6
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
include/fpu/softfloat-macros.h | 82 ----------------------------------
12
include/qemu/host-utils.h | 81 +++++++++++++++++++++++++++++++++
13
2 files changed, 81 insertions(+), 82 deletions(-)
14
15
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/fpu/softfloat-macros.h
18
+++ b/include/fpu/softfloat-macros.h
19
@@ -XXX,XX +XXX,XX @@
20
* so some portions are provided under:
21
* the SoftFloat-2a license
22
* the BSD license
23
- * GPL-v2-or-later
24
*
25
* Any future contributions to this file after December 1st 2014 will be
26
* taken to be licensed under the Softfloat-2a license unless specifically
27
@@ -XXX,XX +XXX,XX @@ this code that are retained.
28
* THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
-/* Portions of this work are licensed under the terms of the GNU GPL,
32
- * version 2 or later. See the COPYING file in the top-level directory.
33
- */
34
-
35
#ifndef FPU_SOFTFLOAT_MACROS_H
36
#define FPU_SOFTFLOAT_MACROS_H
37
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
39
40
}
41
42
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
43
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
44
- *
45
- * Licensed under the GPLv2/LGPLv3
46
- */
47
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
48
- uint64_t n0, uint64_t d)
49
-{
50
-#if defined(__x86_64__)
51
- uint64_t q;
52
- asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
53
- return q;
54
-#elif defined(__s390x__) && !defined(__clang__)
55
- /* Need to use a TImode type to get an even register pair for DLGR. */
56
- unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
57
- asm("dlgr %0, %1" : "+r"(n) : "r"(d));
58
- *r = n >> 64;
59
- return n;
60
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
61
- /* From Power ISA 2.06, programming note for divdeu. */
62
- uint64_t q1, q2, Q, r1, r2, R;
63
- asm("divdeu %0,%2,%4; divdu %1,%3,%4"
64
- : "=&r"(q1), "=r"(q2)
65
- : "r"(n1), "r"(n0), "r"(d));
66
- r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
67
- r2 = n0 - (q2 * d);
68
- Q = q1 + q2;
69
- R = r1 + r2;
70
- if (R >= d || R < r2) { /* overflow implies R > d */
71
- Q += 1;
72
- R -= d;
73
- }
74
- *r = R;
75
- return Q;
76
-#else
77
- uint64_t d0, d1, q0, q1, r1, r0, m;
78
-
79
- d0 = (uint32_t)d;
80
- d1 = d >> 32;
81
-
82
- r1 = n1 % d1;
83
- q1 = n1 / d1;
84
- m = q1 * d0;
85
- r1 = (r1 << 32) | (n0 >> 32);
86
- if (r1 < m) {
87
- q1 -= 1;
88
- r1 += d;
89
- if (r1 >= d) {
90
- if (r1 < m) {
91
- q1 -= 1;
92
- r1 += d;
93
- }
94
- }
95
- }
96
- r1 -= m;
97
-
98
- r0 = r1 % d1;
99
- q0 = r1 / d1;
100
- m = q0 * d0;
101
- r0 = (r0 << 32) | (uint32_t)n0;
102
- if (r0 < m) {
103
- q0 -= 1;
104
- r0 += d;
105
- if (r0 >= d) {
106
- if (r0 < m) {
107
- q0 -= 1;
108
- r0 += d;
109
- }
110
- }
111
- }
112
- r0 -= m;
113
-
114
- *r = r0;
115
- return (q1 << 32) | q0;
116
-#endif
117
-}
118
-
119
/*----------------------------------------------------------------------------
120
| Returns an approximation to the square root of the 32-bit significand given
121
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
122
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
123
index XXXXXXX..XXXXXXX 100644
124
--- a/include/qemu/host-utils.h
125
+++ b/include/qemu/host-utils.h
126
@@ -XXX,XX +XXX,XX @@
127
* THE SOFTWARE.
128
*/
129
130
+/* Portions of this work are licensed under the terms of the GNU GPL,
131
+ * version 2 or later. See the COPYING file in the top-level directory.
132
+ */
133
+
134
#ifndef HOST_UTILS_H
135
#define HOST_UTILS_H
136
137
@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
138
*/
139
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
140
141
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
142
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
143
+ *
144
+ * Licensed under the GPLv2/LGPLv3
145
+ */
146
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
147
+ uint64_t n0, uint64_t d)
148
+{
149
+#if defined(__x86_64__)
150
+ uint64_t q;
151
+ asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
152
+ return q;
153
+#elif defined(__s390x__) && !defined(__clang__)
154
+ /* Need to use a TImode type to get an even register pair for DLGR. */
155
+ unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
156
+ asm("dlgr %0, %1" : "+r"(n) : "r"(d));
157
+ *r = n >> 64;
158
+ return n;
159
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
160
+ /* From Power ISA 2.06, programming note for divdeu. */
161
+ uint64_t q1, q2, Q, r1, r2, R;
162
+ asm("divdeu %0,%2,%4; divdu %1,%3,%4"
163
+ : "=&r"(q1), "=r"(q2)
164
+ : "r"(n1), "r"(n0), "r"(d));
165
+ r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
166
+ r2 = n0 - (q2 * d);
167
+ Q = q1 + q2;
168
+ R = r1 + r2;
169
+ if (R >= d || R < r2) { /* overflow implies R > d */
170
+ Q += 1;
171
+ R -= d;
172
+ }
173
+ *r = R;
174
+ return Q;
175
+#else
176
+ uint64_t d0, d1, q0, q1, r1, r0, m;
177
+
178
+ d0 = (uint32_t)d;
179
+ d1 = d >> 32;
180
+
181
+ r1 = n1 % d1;
182
+ q1 = n1 / d1;
183
+ m = q1 * d0;
184
+ r1 = (r1 << 32) | (n0 >> 32);
185
+ if (r1 < m) {
186
+ q1 -= 1;
187
+ r1 += d;
188
+ if (r1 >= d) {
189
+ if (r1 < m) {
190
+ q1 -= 1;
191
+ r1 += d;
192
+ }
193
+ }
194
+ }
195
+ r1 -= m;
196
+
197
+ r0 = r1 % d1;
198
+ q0 = r1 / d1;
199
+ m = q0 * d0;
200
+ r0 = (r0 << 32) | (uint32_t)n0;
201
+ if (r0 < m) {
202
+ q0 -= 1;
203
+ r0 += d;
204
+ if (r0 >= d) {
205
+ if (r0 < m) {
206
+ q0 -= 1;
207
+ r0 += d;
208
+ }
209
+ }
210
+ }
211
+ r0 -= m;
212
+
213
+ *r = r0;
214
+ return (q1 << 32) | q0;
215
+#endif
216
+}
217
+
218
#endif
219
--
220
2.25.1
221
222
diff view generated by jsdifflib
Deleted patch
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
1
3
These will be used to implement new decimal floating point
4
instructions from Power ISA 3.1.
5
6
The remainder is now returned directly by divu128/divs128,
7
freeing up phigh to receive the high 64 bits of the quotient.
8
9
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
include/hw/clock.h | 6 +-
15
include/qemu/host-utils.h | 20 ++++--
16
target/ppc/int_helper.c | 9 +--
17
util/host-utils.c | 133 +++++++++++++++++++++++++-------------
18
4 files changed, 108 insertions(+), 60 deletions(-)
19
20
diff --git a/include/hw/clock.h b/include/hw/clock.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/clock.h
23
+++ b/include/hw/clock.h
24
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
25
if (clk->period == 0) {
26
return 0;
27
}
28
- /*
29
- * BUG: when CONFIG_INT128 is not defined, the current implementation of
30
- * divu128 does not return a valid truncated quotient, so the result will
31
- * be wrong.
32
- */
33
+
34
divu128(&lo, &hi, clk->period);
35
return lo;
36
}
37
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/qemu/host-utils.h
40
+++ b/include/qemu/host-utils.h
41
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
42
return (__int128_t)a * b / c;
43
}
44
45
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
46
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
47
+ uint64_t divisor)
48
{
49
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
50
__uint128_t result = dividend / divisor;
51
+
52
*plow = result;
53
- *phigh = dividend % divisor;
54
+ *phigh = result >> 64;
55
+ return dividend % divisor;
56
}
57
58
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
59
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
60
+ int64_t divisor)
61
{
62
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
63
+ __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
64
__int128_t result = dividend / divisor;
65
+
66
*plow = result;
67
- *phigh = dividend % divisor;
68
+ *phigh = result >> 64;
69
+ return dividend % divisor;
70
}
71
#else
72
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
73
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
74
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
75
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
76
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
77
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
78
79
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
80
{
81
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/ppc/int_helper.c
84
+++ b/target/ppc/int_helper.c
85
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
86
87
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
88
{
89
- int64_t rt = 0;
90
+ uint64_t rt = 0;
91
int64_t ra = (int64_t)rau;
92
int64_t rb = (int64_t)rbu;
93
int overflow = 0;
94
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
95
int cr;
96
uint64_t lo_value;
97
uint64_t hi_value;
98
+ uint64_t rem;
99
ppc_avr_t ret = { .u64 = { 0, 0 } };
100
101
if (b->VsrSD(0) < 0) {
102
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
103
* In that case, we leave r unchanged.
104
*/
105
} else {
106
- divu128(&lo_value, &hi_value, 1000000000000000ULL);
107
+ rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
108
109
- for (i = 1; i < 16; hi_value /= 10, i++) {
110
- bcd_put_digit(&ret, hi_value % 10, i);
111
+ for (i = 1; i < 16; rem /= 10, i++) {
112
+ bcd_put_digit(&ret, rem % 10, i);
113
}
114
115
for (; i < 32; lo_value /= 10, i++) {
116
diff --git a/util/host-utils.c b/util/host-utils.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/util/host-utils.c
119
+++ b/util/host-utils.c
120
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
121
}
122
123
/*
124
- * Unsigned 128-by-64 division. Returns quotient via plow and
125
- * remainder via phigh.
126
- * The result must fit in 64 bits (plow) - otherwise, the result
127
- * is undefined.
128
- * This function will cause a division by zero if passed a zero divisor.
129
+ * Unsigned 128-by-64 division.
130
+ * Returns the remainder.
131
+ * Returns quotient via plow and phigh.
132
+ * Also returns the remainder via the function return value.
133
*/
134
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
135
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
136
{
137
uint64_t dhi = *phigh;
138
uint64_t dlo = *plow;
139
- unsigned i;
140
- uint64_t carry = 0;
141
+ uint64_t rem, dhighest;
142
+ int sh;
143
144
if (divisor == 0 || dhi == 0) {
145
*plow = dlo / divisor;
146
- *phigh = dlo % divisor;
147
+ *phigh = 0;
148
+ return dlo % divisor;
149
} else {
150
+ sh = clz64(divisor);
151
152
- for (i = 0; i < 64; i++) {
153
- carry = dhi >> 63;
154
- dhi = (dhi << 1) | (dlo >> 63);
155
- if (carry || (dhi >= divisor)) {
156
- dhi -= divisor;
157
- carry = 1;
158
- } else {
159
- carry = 0;
160
+ if (dhi < divisor) {
161
+ if (sh != 0) {
162
+ /* normalize the divisor, shifting the dividend accordingly */
163
+ divisor <<= sh;
164
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
165
+ dlo <<= sh;
166
}
167
- dlo = (dlo << 1) | carry;
168
+
169
+ *phigh = 0;
170
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
171
+ } else {
172
+ if (sh != 0) {
173
+ /* normalize the divisor, shifting the dividend accordingly */
174
+ divisor <<= sh;
175
+ dhighest = dhi >> (64 - sh);
176
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
177
+ dlo <<= sh;
178
+
179
+ *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
180
+ } else {
181
+ /**
182
+ * dhi >= divisor
183
+ * Since the MSB of divisor is set (sh == 0),
184
+ * (dhi - divisor) < divisor
185
+ *
186
+ * Thus, the high part of the quotient is 1, and we can
187
+ * calculate the low part with a single call to udiv_qrnnd
188
+ * after subtracting divisor from dhi
189
+ */
190
+ dhi -= divisor;
191
+ *phigh = 1;
192
+ }
193
+
194
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
195
}
196
197
- *plow = dlo;
198
- *phigh = dhi;
199
+ /*
200
+ * since the dividend/divisor might have been normalized,
201
+ * the remainder might also have to be shifted back
202
+ */
203
+ return rem >> sh;
204
}
205
}
206
207
/*
208
- * Signed 128-by-64 division. Returns quotient via plow and
209
- * remainder via phigh.
210
- * The result must fit in 64 bits (plow) - otherwise, the result
211
- * is undefined.
212
- * This function will cause a division by zero if passed a zero divisor.
213
+ * Signed 128-by-64 division.
214
+ * Returns quotient via plow and phigh.
215
+ * Also returns the remainder via the function return value.
216
*/
217
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
218
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
219
{
220
- int sgn_dvdnd = *phigh < 0;
221
- int sgn_divsr = divisor < 0;
222
+ bool neg_quotient = false, neg_remainder = false;
223
+ uint64_t unsig_hi = *phigh, unsig_lo = *plow;
224
+ uint64_t rem;
225
226
- if (sgn_dvdnd) {
227
- *plow = ~(*plow);
228
- *phigh = ~(*phigh);
229
- if (*plow == (int64_t)-1) {
230
+ if (*phigh < 0) {
231
+ neg_quotient = !neg_quotient;
232
+ neg_remainder = !neg_remainder;
233
+
234
+ if (unsig_lo == 0) {
235
+ unsig_hi = -unsig_hi;
236
+ } else {
237
+ unsig_hi = ~unsig_hi;
238
+ unsig_lo = -unsig_lo;
239
+ }
240
+ }
241
+
242
+ if (divisor < 0) {
243
+ neg_quotient = !neg_quotient;
244
+
245
+ divisor = -divisor;
246
+ }
247
+
248
+ rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
249
+
250
+ if (neg_quotient) {
251
+ if (unsig_lo == 0) {
252
+ *phigh = -unsig_hi;
253
*plow = 0;
254
- (*phigh)++;
255
- } else {
256
- (*plow)++;
257
- }
258
+ } else {
259
+ *phigh = ~unsig_hi;
260
+ *plow = -unsig_lo;
261
+ }
262
+ } else {
263
+ *phigh = unsig_hi;
264
+ *plow = unsig_lo;
265
}
266
267
- if (sgn_divsr) {
268
- divisor = 0 - divisor;
269
- }
270
-
271
- divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
272
-
273
- if (sgn_dvdnd ^ sgn_divsr) {
274
- *plow = 0 - *plow;
275
+ if (neg_remainder) {
276
+ return -rem;
277
+ } else {
278
+ return rem;
279
}
280
}
281
#endif
282
--
283
2.25.1
284
285
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
From: Emilio Cota <cota@braap.org>
2
2
3
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
3
The only reason to add this implementation is to control the memory allocator
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
used. Some users (e.g. TCG) cannot work reliably in multi-threaded
5
Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
5
environments (e.g. forking in user-mode) with GTree's allocator, GSlice.
6
See https://gitlab.com/qemu-project/qemu/-/issues/285 for details.
7
8
Importing GTree is a temporary workaround until GTree migrates away
9
from GSlice.
10
11
This implementation is identical to that in glib v2.75.0, except that
12
we don't import recent additions to the API nor deprecated API calls,
13
none of which are used in QEMU.
14
15
I've imported tests from glib and added a benchmark just to
16
make sure that performance is similar. Note: it cannot be identical
17
because (1) we are not using GSlice, (2) we use different compilation flags
18
(e.g. -fPIC) and (3) we're linking statically.
19
20
$ cat /proc/cpuinfo| grep 'model name' | head -1
21
model name : AMD Ryzen 7 PRO 5850U with Radeon Graphics
22
$ echo '0' | sudo tee /sys/devices/system/cpu/cpufreq/boost
23
$ tests/bench/qtree-bench
24
25
Tree Op 32 1024 4096 131072 1048576
26
------------------------------------------------------------------------------------------------
27
GTree Lookup 83.23 43.08 25.31 19.40 16.22
28
QTree Lookup 113.42 (1.36x) 53.83 (1.25x) 28.38 (1.12x) 17.64 (0.91x) 13.04 (0.80x)
29
GTree Insert 44.23 29.37 25.83 19.49 17.03
30
QTree Insert 46.87 (1.06x) 25.62 (0.87x) 24.29 (0.94x) 16.83 (0.86x) 12.97 (0.76x)
31
GTree Remove 53.27 35.15 31.43 24.64 16.70
32
QTree Remove 57.32 (1.08x) 41.76 (1.19x) 38.37 (1.22x) 29.30 (1.19x) 15.07 (0.90x)
33
GTree RemoveAll 135.44 127.52 126.72 120.11 64.34
34
QTree RemoveAll 127.15 (0.94x) 110.37 (0.87x) 107.97 (0.85x) 97.13 (0.81x) 55.10 (0.86x)
35
GTree Traverse 277.71 276.09 272.78 246.72 98.47
36
QTree Traverse 370.33 (1.33x) 411.97 (1.49x) 400.23 (1.47x) 262.82 (1.07x) 78.52 (0.80x)
37
------------------------------------------------------------------------------------------------
38
39
As a sanity check, the same benchmark when Glib's version
40
is >= $glib_dropped_gslice_version (i.e. QTree == GTree):
41
42
Tree Op 32 1024 4096 131072 1048576
43
------------------------------------------------------------------------------------------------
44
GTree Lookup 82.72 43.09 24.18 19.73 16.09
45
QTree Lookup 81.82 (0.99x) 43.10 (1.00x) 24.20 (1.00x) 19.76 (1.00x) 16.26 (1.01x)
46
GTree Insert 45.07 29.62 26.34 19.90 17.18
47
QTree Insert 45.72 (1.01x) 29.60 (1.00x) 26.38 (1.00x) 19.71 (0.99x) 17.20 (1.00x)
48
GTree Remove 54.48 35.36 31.77 24.97 16.95
49
QTree Remove 54.46 (1.00x) 35.32 (1.00x) 31.77 (1.00x) 24.91 (1.00x) 17.15 (1.01x)
50
GTree RemoveAll 140.68 127.36 125.43 121.45 68.20
51
QTree RemoveAll 140.65 (1.00x) 127.64 (1.00x) 125.01 (1.00x) 121.73 (1.00x) 67.06 (0.98x)
52
GTree Traverse 278.68 276.05 266.75 251.65 104.93
53
QTree Traverse 278.31 (1.00x) 275.78 (1.00x) 266.42 (1.00x) 247.89 (0.99x) 104.58 (1.00x)
54
------------------------------------------------------------------------------------------------
55
56
Signed-off-by: Emilio Cota <cota@braap.org>
57
Message-Id: <20230205163758.416992-2-cota@braap.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
58
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
59
---
8
tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
60
configure | 15 +
9
tests/unit/meson.build | 1 +
61
meson.build | 4 +
10
2 files changed, 198 insertions(+)
62
include/qemu/qtree.h | 201 ++++++
11
create mode 100644 tests/unit/test-div128.c
63
tests/bench/qtree-bench.c | 286 ++++++++
64
tests/unit/test-qtree.c | 333 +++++++++
65
util/qtree.c | 1390 +++++++++++++++++++++++++++++++++++++
66
tests/bench/meson.build | 4 +
67
tests/unit/meson.build | 1 +
68
util/meson.build | 1 +
69
9 files changed, 2235 insertions(+)
70
create mode 100644 include/qemu/qtree.h
71
create mode 100644 tests/bench/qtree-bench.c
72
create mode 100644 tests/unit/test-qtree.c
73
create mode 100644 util/qtree.c
12
74
13
diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
75
diff --git a/configure b/configure
76
index XXXXXXX..XXXXXXX 100755
77
--- a/configure
78
+++ b/configure
79
@@ -XXX,XX +XXX,XX @@ safe_stack=""
80
use_containers="yes"
81
gdb_bin=$(command -v "gdb-multiarch" || command -v "gdb")
82
gdb_arches=""
83
+glib_has_gslice="no"
84
85
if test -e "$source_path/.git"
86
then
87
@@ -XXX,XX +XXX,XX @@ for i in $glib_modules; do
88
fi
89
done
90
91
+# Check whether glib has gslice, which we have to avoid for correctness.
92
+# TODO: remove this check and the corresponding workaround (qtree) when
93
+# the minimum supported glib is >= $glib_dropped_gslice_version.
94
+glib_dropped_gslice_version=2.75.3
95
+for i in $glib_modules; do
96
+ if ! $pkg_config --atleast-version=$glib_dropped_gslice_version $i; then
97
+ glib_has_gslice="yes"
98
+    break
99
+ fi
100
+done
101
+
102
glib_bindir="$($pkg_config --variable=bindir glib-2.0)"
103
if test -z "$glib_bindir" ; then
104
    glib_bindir="$($pkg_config --variable=prefix glib-2.0)"/bin
105
@@ -XXX,XX +XXX,XX @@ echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak
106
echo "GLIB_LIBS=$glib_libs" >> $config_host_mak
107
echo "GLIB_BINDIR=$glib_bindir" >> $config_host_mak
108
echo "GLIB_VERSION=$($pkg_config --modversion glib-2.0)" >> $config_host_mak
109
+if test "$glib_has_gslice" = "yes" ; then
110
+ echo "HAVE_GLIB_WITH_SLICE_ALLOCATOR=y" >> $config_host_mak
111
+fi
112
echo "QEMU_LDFLAGS=$QEMU_LDFLAGS" >> $config_host_mak
113
echo "EXESUF=$EXESUF" >> $config_host_mak
114
115
diff --git a/meson.build b/meson.build
116
index XXXXXXX..XXXXXXX 100644
117
--- a/meson.build
118
+++ b/meson.build
119
@@ -XXX,XX +XXX,XX @@ glib = declare_dependency(compile_args: config_host['GLIB_CFLAGS'].split(),
120
})
121
# override glib dep with the configure results (for subprojects)
122
meson.override_dependency('glib-2.0', glib)
123
+# pass down whether Glib has the slice allocator
124
+if config_host.has_key('HAVE_GLIB_WITH_SLICE_ALLOCATOR')
125
+ config_host_data.set('HAVE_GLIB_WITH_SLICE_ALLOCATOR', true)
126
+endif
127
128
gio = not_found
129
gdbus_codegen = not_found
130
diff --git a/include/qemu/qtree.h b/include/qemu/qtree.h
14
new file mode 100644
131
new file mode 100644
15
index XXXXXXX..XXXXXXX
132
index XXXXXXX..XXXXXXX
16
--- /dev/null
133
--- /dev/null
17
+++ b/tests/unit/test-div128.c
134
+++ b/include/qemu/qtree.h
18
@@ -XXX,XX +XXX,XX @@
135
@@ -XXX,XX +XXX,XX @@
19
+/*
136
+/*
20
+ * Test 128-bit division functions
137
+ * GLIB - Library of useful routines for C programming
21
+ *
138
+ * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
22
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
139
+ *
140
+ * SPDX-License-Identifier: LGPL-2.1-or-later
23
+ *
141
+ *
24
+ * This library is free software; you can redistribute it and/or
142
+ * This library is free software; you can redistribute it and/or
25
+ * modify it under the terms of the GNU Lesser General Public
143
+ * modify it under the terms of the GNU Lesser General Public
26
+ * License as published by the Free Software Foundation; either
144
+ * License as published by the Free Software Foundation; either
27
+ * version 2.1 of the License, or (at your option) any later version.
145
+ * version 2.1 of the License, or (at your option) any later version.
28
+ *
146
+ *
29
+ * This library is distributed in the hope that it will be useful,
147
+ * This library is distributed in the hope that it will be useful,
30
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
148
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
149
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
150
+ * Lesser General Public License for more details.
151
+ *
152
+ * You should have received a copy of the GNU Lesser General Public
153
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
154
+ */
155
+
156
+/*
157
+ * Modified by the GLib Team and others 1997-2000. See the AUTHORS
158
+ * file for a list of people on the GLib Team. See the ChangeLog
159
+ * files for a list of changes. These files are distributed with
160
+ * GLib at ftp://ftp.gtk.org/pub/gtk/.
161
+ */
162
+
163
+/*
164
+ * QTree is a partial import of Glib's GTree. The parts excluded correspond
165
+ * to API calls either deprecated (e.g. g_tree_traverse) or recently added
166
+ * (e.g. g_tree_search_node, added in 2.68); neither have callers in QEMU.
167
+ *
168
+ * The reason for this import is to allow us to control the memory allocator
169
+ * used by the tree implementation. Until Glib 2.75.3, GTree uses Glib's
170
+ * slice allocator, which causes problems when forking in user-mode;
171
+ * see https://gitlab.com/qemu-project/qemu/-/issues/285 and glib's
172
+ * "45b5a6c1e gslice: Remove slice allocator and use malloc() instead".
173
+ *
174
+ * TODO: remove QTree when QEMU's minimum Glib version is >= 2.75.3.
175
+ */
176
+
177
+#ifndef QEMU_QTREE_H
178
+#define QEMU_QTREE_H
179
+
180
+#include "qemu/osdep.h"
181
+
182
+#ifdef HAVE_GLIB_WITH_SLICE_ALLOCATOR
183
+
184
+typedef struct _QTree QTree;
185
+
186
+typedef struct _QTreeNode QTreeNode;
187
+
188
+typedef gboolean (*QTraverseNodeFunc)(QTreeNode *node,
189
+ gpointer user_data);
190
+
191
+/*
192
+ * Balanced binary trees
193
+ */
194
+QTree *q_tree_new(GCompareFunc key_compare_func);
195
+QTree *q_tree_new_with_data(GCompareDataFunc key_compare_func,
196
+ gpointer key_compare_data);
197
+QTree *q_tree_new_full(GCompareDataFunc key_compare_func,
198
+ gpointer key_compare_data,
199
+ GDestroyNotify key_destroy_func,
200
+ GDestroyNotify value_destroy_func);
201
+QTree *q_tree_ref(QTree *tree);
202
+void q_tree_unref(QTree *tree);
203
+void q_tree_destroy(QTree *tree);
204
+void q_tree_insert(QTree *tree,
205
+ gpointer key,
206
+ gpointer value);
207
+void q_tree_replace(QTree *tree,
208
+ gpointer key,
209
+ gpointer value);
210
+gboolean q_tree_remove(QTree *tree,
211
+ gconstpointer key);
212
+gboolean q_tree_steal(QTree *tree,
213
+ gconstpointer key);
214
+gpointer q_tree_lookup(QTree *tree,
215
+ gconstpointer key);
216
+gboolean q_tree_lookup_extended(QTree *tree,
217
+ gconstpointer lookup_key,
218
+ gpointer *orig_key,
219
+ gpointer *value);
220
+void q_tree_foreach(QTree *tree,
221
+ GTraverseFunc func,
222
+ gpointer user_data);
223
+gpointer q_tree_search(QTree *tree,
224
+ GCompareFunc search_func,
225
+ gconstpointer user_data);
226
+gint q_tree_height(QTree *tree);
227
+gint q_tree_nnodes(QTree *tree);
228
+
229
+#else /* !HAVE_GLIB_WITH_SLICE_ALLOCATOR */
230
+
231
+typedef GTree QTree;
232
+typedef GTreeNode QTreeNode;
233
+typedef GTraverseNodeFunc QTraverseNodeFunc;
234
+
235
+static inline QTree *q_tree_new(GCompareFunc key_compare_func)
236
+{
237
+ return g_tree_new(key_compare_func);
238
+}
239
+
240
+static inline QTree *q_tree_new_with_data(GCompareDataFunc key_compare_func,
241
+ gpointer key_compare_data)
242
+{
243
+ return g_tree_new_with_data(key_compare_func, key_compare_data);
244
+}
245
+
246
+static inline QTree *q_tree_new_full(GCompareDataFunc key_compare_func,
247
+ gpointer key_compare_data,
248
+ GDestroyNotify key_destroy_func,
249
+ GDestroyNotify value_destroy_func)
250
+{
251
+ return g_tree_new_full(key_compare_func, key_compare_data,
252
+ key_destroy_func, value_destroy_func);
253
+}
254
+
255
+static inline QTree *q_tree_ref(QTree *tree)
256
+{
257
+ return g_tree_ref(tree);
258
+}
259
+
260
+static inline void q_tree_unref(QTree *tree)
261
+{
262
+ g_tree_unref(tree);
263
+}
264
+
265
+static inline void q_tree_destroy(QTree *tree)
266
+{
267
+ g_tree_destroy(tree);
268
+}
269
+
270
+static inline void q_tree_insert(QTree *tree,
271
+ gpointer key,
272
+ gpointer value)
273
+{
274
+ g_tree_insert(tree, key, value);
275
+}
276
+
277
+static inline void q_tree_replace(QTree *tree,
278
+ gpointer key,
279
+ gpointer value)
280
+{
281
+ g_tree_replace(tree, key, value);
282
+}
283
+
284
+static inline gboolean q_tree_remove(QTree *tree,
285
+ gconstpointer key)
286
+{
287
+ return g_tree_remove(tree, key);
288
+}
289
+
290
+static inline gboolean q_tree_steal(QTree *tree,
291
+ gconstpointer key)
292
+{
293
+ return g_tree_steal(tree, key);
294
+}
295
+
296
+static inline gpointer q_tree_lookup(QTree *tree,
297
+ gconstpointer key)
298
+{
299
+ return g_tree_lookup(tree, key);
300
+}
301
+
302
+static inline gboolean q_tree_lookup_extended(QTree *tree,
303
+ gconstpointer lookup_key,
304
+ gpointer *orig_key,
305
+ gpointer *value)
306
+{
307
+ return g_tree_lookup_extended(tree, lookup_key, orig_key, value);
308
+}
309
+
310
+static inline void q_tree_foreach(QTree *tree,
311
+ GTraverseFunc func,
312
+ gpointer user_data)
313
+{
314
+ return g_tree_foreach(tree, func, user_data);
315
+}
316
+
317
+static inline gpointer q_tree_search(QTree *tree,
318
+ GCompareFunc search_func,
319
+ gconstpointer user_data)
320
+{
321
+ return g_tree_search(tree, search_func, user_data);
322
+}
323
+
324
+static inline gint q_tree_height(QTree *tree)
325
+{
326
+ return g_tree_height(tree);
327
+}
328
+
329
+static inline gint q_tree_nnodes(QTree *tree)
330
+{
331
+ return g_tree_nnodes(tree);
332
+}
333
+
334
+#endif /* HAVE_GLIB_WITH_SLICE_ALLOCATOR */
335
+
336
+#endif /* QEMU_QTREE_H */
337
diff --git a/tests/bench/qtree-bench.c b/tests/bench/qtree-bench.c
338
new file mode 100644
339
index XXXXXXX..XXXXXXX
340
--- /dev/null
341
+++ b/tests/bench/qtree-bench.c
342
@@ -XXX,XX +XXX,XX @@
343
+/* SPDX-License-Identifier: GPL-2.0-or-later */
344
+#include "qemu/osdep.h"
345
+#include "qemu/qtree.h"
346
+#include "qemu/timer.h"
347
+
348
+enum tree_op {
349
+ OP_LOOKUP,
350
+ OP_INSERT,
351
+ OP_REMOVE,
352
+ OP_REMOVE_ALL,
353
+ OP_TRAVERSE,
354
+};
355
+
356
+struct benchmark {
357
+ const char * const name;
358
+ enum tree_op op;
359
+ bool fill_on_init;
360
+};
361
+
362
+enum impl_type {
363
+ IMPL_GTREE,
364
+ IMPL_QTREE,
365
+};
366
+
367
+struct tree_implementation {
368
+ const char * const name;
369
+ enum impl_type type;
370
+};
371
+
372
+static const struct benchmark benchmarks[] = {
373
+ {
374
+ .name = "Lookup",
375
+ .op = OP_LOOKUP,
376
+ .fill_on_init = true,
377
+ },
378
+ {
379
+ .name = "Insert",
380
+ .op = OP_INSERT,
381
+ .fill_on_init = false,
382
+ },
383
+ {
384
+ .name = "Remove",
385
+ .op = OP_REMOVE,
386
+ .fill_on_init = true,
387
+ },
388
+ {
389
+ .name = "RemoveAll",
390
+ .op = OP_REMOVE_ALL,
391
+ .fill_on_init = true,
392
+ },
393
+ {
394
+ .name = "Traverse",
395
+ .op = OP_TRAVERSE,
396
+ .fill_on_init = true,
397
+ },
398
+};
399
+
400
+static const struct tree_implementation impls[] = {
401
+ {
402
+ .name = "GTree",
403
+ .type = IMPL_GTREE,
404
+ },
405
+ {
406
+ .name = "QTree",
407
+ .type = IMPL_QTREE,
408
+ },
409
+};
410
+
411
+static int compare_func(const void *ap, const void *bp)
412
+{
413
+ const size_t *a = ap;
414
+ const size_t *b = bp;
415
+
416
+ return *a - *b;
417
+}
418
+
419
+static void init_empty_tree_and_keys(enum impl_type impl,
420
+ void **ret_tree, size_t **ret_keys,
421
+ size_t n_elems)
422
+{
423
+ size_t *keys = g_malloc_n(n_elems, sizeof(*keys));
424
+ for (size_t i = 0; i < n_elems; i++) {
425
+ keys[i] = i;
426
+ }
427
+
428
+ void *tree;
429
+ switch (impl) {
430
+ case IMPL_GTREE:
431
+ tree = g_tree_new(compare_func);
432
+ break;
433
+ case IMPL_QTREE:
434
+ tree = q_tree_new(compare_func);
435
+ break;
436
+ default:
437
+ g_assert_not_reached();
438
+ }
439
+
440
+ *ret_tree = tree;
441
+ *ret_keys = keys;
442
+}
443
+
444
+static gboolean traverse_func(gpointer key, gpointer value, gpointer data)
445
+{
446
+ return FALSE;
447
+}
448
+
449
+static inline void remove_all(void *tree, enum impl_type impl)
450
+{
451
+ switch (impl) {
452
+ case IMPL_GTREE:
453
+ g_tree_destroy(tree);
454
+ break;
455
+ case IMPL_QTREE:
456
+ q_tree_destroy(tree);
457
+ break;
458
+ default:
459
+ g_assert_not_reached();
460
+ }
461
+}
462
+
463
+static int64_t run_benchmark(const struct benchmark *bench,
464
+ enum impl_type impl,
465
+ size_t n_elems)
466
+{
467
+ void *tree;
468
+ size_t *keys;
469
+
470
+ init_empty_tree_and_keys(impl, &tree, &keys, n_elems);
471
+ if (bench->fill_on_init) {
472
+ for (size_t i = 0; i < n_elems; i++) {
473
+ switch (impl) {
474
+ case IMPL_GTREE:
475
+ g_tree_insert(tree, &keys[i], &keys[i]);
476
+ break;
477
+ case IMPL_QTREE:
478
+ q_tree_insert(tree, &keys[i], &keys[i]);
479
+ break;
480
+ default:
481
+ g_assert_not_reached();
482
+ }
483
+ }
484
+ }
485
+
486
+ int64_t start_ns = get_clock();
487
+ switch (bench->op) {
488
+ case OP_LOOKUP:
489
+ for (size_t i = 0; i < n_elems; i++) {
490
+ void *value;
491
+ switch (impl) {
492
+ case IMPL_GTREE:
493
+ value = g_tree_lookup(tree, &keys[i]);
494
+ break;
495
+ case IMPL_QTREE:
496
+ value = q_tree_lookup(tree, &keys[i]);
497
+ break;
498
+ default:
499
+ g_assert_not_reached();
500
+ }
501
+ (void)value;
502
+ }
503
+ break;
504
+ case OP_INSERT:
505
+ for (size_t i = 0; i < n_elems; i++) {
506
+ switch (impl) {
507
+ case IMPL_GTREE:
508
+ g_tree_insert(tree, &keys[i], &keys[i]);
509
+ break;
510
+ case IMPL_QTREE:
511
+ q_tree_insert(tree, &keys[i], &keys[i]);
512
+ break;
513
+ default:
514
+ g_assert_not_reached();
515
+ }
516
+ }
517
+ break;
518
+ case OP_REMOVE:
519
+ for (size_t i = 0; i < n_elems; i++) {
520
+ switch (impl) {
521
+ case IMPL_GTREE:
522
+ g_tree_remove(tree, &keys[i]);
523
+ break;
524
+ case IMPL_QTREE:
525
+ q_tree_remove(tree, &keys[i]);
526
+ break;
527
+ default:
528
+ g_assert_not_reached();
529
+ }
530
+ }
531
+ break;
532
+ case OP_REMOVE_ALL:
533
+ remove_all(tree, impl);
534
+ break;
535
+ case OP_TRAVERSE:
536
+ switch (impl) {
537
+ case IMPL_GTREE:
538
+ g_tree_foreach(tree, traverse_func, NULL);
539
+ break;
540
+ case IMPL_QTREE:
541
+ q_tree_foreach(tree, traverse_func, NULL);
542
+ break;
543
+ default:
544
+ g_assert_not_reached();
545
+ }
546
+ break;
547
+ default:
548
+ g_assert_not_reached();
549
+ }
550
+ int64_t ns = get_clock() - start_ns;
551
+
552
+ if (bench->op != OP_REMOVE_ALL) {
553
+ remove_all(tree, impl);
554
+ }
555
+ g_free(keys);
556
+
557
+ return ns;
558
+}
559
+
560
+int main(int argc, char *argv[])
561
+{
562
+ size_t sizes[] = {
563
+ 32,
564
+ 1024,
565
+ 1024 * 4,
566
+ 1024 * 128,
567
+ 1024 * 1024,
568
+ };
569
+
570
+ double res[ARRAY_SIZE(benchmarks)][ARRAY_SIZE(impls)][ARRAY_SIZE(sizes)];
571
+ for (int i = 0; i < ARRAY_SIZE(sizes); i++) {
572
+ size_t size = sizes[i];
573
+ for (int j = 0; j < ARRAY_SIZE(impls); j++) {
574
+ const struct tree_implementation *impl = &impls[j];
575
+ for (int k = 0; k < ARRAY_SIZE(benchmarks); k++) {
576
+ const struct benchmark *bench = &benchmarks[k];
577
+
578
+ /* warm-up run */
579
+ run_benchmark(bench, impl->type, size);
580
+
581
+ int64_t total_ns = 0;
582
+ int64_t n_runs = 0;
583
+ while (total_ns < 2e8 || n_runs < 5) {
584
+ total_ns += run_benchmark(bench, impl->type, size);
585
+ n_runs++;
586
+ }
587
+ double ns_per_run = (double)total_ns / n_runs;
588
+
589
+ /* Throughput, in Mops/s */
590
+ res[k][j][i] = size / ns_per_run * 1e3;
591
+ }
592
+ }
593
+ }
594
+
595
+ printf("# Results' breakdown: Tree, Op and #Elements. Units: Mops/s\n");
596
+ printf("%5s %10s ", "Tree", "Op");
597
+ for (int i = 0; i < ARRAY_SIZE(sizes); i++) {
598
+ printf("%7zu ", sizes[i]);
599
+ }
600
+ printf("\n");
601
+ char separator[97];
602
+ for (int i = 0; i < ARRAY_SIZE(separator) - 1; i++) {
603
+ separator[i] = '-';
604
+ }
605
+ separator[ARRAY_SIZE(separator) - 1] = '\0';
606
+ printf("%s\n", separator);
607
+ for (int i = 0; i < ARRAY_SIZE(benchmarks); i++) {
608
+ for (int j = 0; j < ARRAY_SIZE(impls); j++) {
609
+ printf("%5s %10s ", impls[j].name, benchmarks[i].name);
610
+ for (int k = 0; k < ARRAY_SIZE(sizes); k++) {
611
+ printf("%7.2f ", res[i][j][k]);
612
+ if (j == 0) {
613
+ printf(" ");
614
+ } else {
615
+ if (res[i][0][k] != 0) {
616
+ double speedup = res[i][j][k] / res[i][0][k];
617
+ printf("(%4.2fx) ", speedup);
618
+ } else {
619
+ printf("( ) ");
620
+ }
621
+ }
622
+ }
623
+ printf("\n");
624
+ }
625
+ }
626
+ printf("%s\n", separator);
627
+ return 0;
628
+}
629
diff --git a/tests/unit/test-qtree.c b/tests/unit/test-qtree.c
630
new file mode 100644
631
index XXXXXXX..XXXXXXX
632
--- /dev/null
633
+++ b/tests/unit/test-qtree.c
634
@@ -XXX,XX +XXX,XX @@
635
+/*
636
+ * SPDX-License-Identifier: LGPL-2.1-or-later
637
+ *
638
+ * Tests for QTree.
639
+ * Original source: glib
640
+ * https://gitlab.gnome.org/GNOME/glib/-/blob/main/glib/tests/tree.c
641
+ * LGPL license.
642
+ * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
643
+ */
644
+
645
+#include "qemu/osdep.h"
646
+#include "qemu/qtree.h"
647
+
648
+static gint my_compare(gconstpointer a, gconstpointer b)
649
+{
650
+ const char *cha = a;
651
+ const char *chb = b;
652
+
653
+ return *cha - *chb;
654
+}
655
+
656
+static gint my_compare_with_data(gconstpointer a,
657
+ gconstpointer b,
658
+ gpointer user_data)
659
+{
660
+ const char *cha = a;
661
+ const char *chb = b;
662
+
663
+ /* just check that we got the right data */
664
+ g_assert(GPOINTER_TO_INT(user_data) == 123);
665
+
666
+ return *cha - *chb;
667
+}
668
+
669
+static gint my_search(gconstpointer a, gconstpointer b)
670
+{
671
+ return my_compare(b, a);
672
+}
673
+
674
+static gpointer destroyed_key;
675
+static gpointer destroyed_value;
676
+static guint destroyed_key_count;
677
+static guint destroyed_value_count;
678
+
679
+static void my_key_destroy(gpointer key)
680
+{
681
+ destroyed_key = key;
682
+ destroyed_key_count++;
683
+}
684
+
685
+static void my_value_destroy(gpointer value)
686
+{
687
+ destroyed_value = value;
688
+ destroyed_value_count++;
689
+}
690
+
691
+static gint my_traverse(gpointer key, gpointer value, gpointer data)
692
+{
693
+ char *ch = key;
694
+
695
+ g_assert((*ch) > 0);
696
+
697
+ if (*ch == 'd') {
698
+ return TRUE;
699
+ }
700
+
701
+ return FALSE;
702
+}
703
+
704
+char chars[] =
705
+ "0123456789"
706
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
707
+ "abcdefghijklmnopqrstuvwxyz";
708
+
709
+char chars2[] =
710
+ "0123456789"
711
+ "abcdefghijklmnopqrstuvwxyz";
712
+
713
+static gint check_order(gpointer key, gpointer value, gpointer data)
714
+{
715
+ char **p = data;
716
+ char *ch = key;
717
+
718
+ g_assert(**p == *ch);
719
+
720
+ (*p)++;
721
+
722
+ return FALSE;
723
+}
724
+
725
+static void test_tree_search(void)
726
+{
727
+ gint i;
728
+ QTree *tree;
729
+ gboolean removed;
730
+ gchar c;
731
+ gchar *p, *d;
732
+
733
+ tree = q_tree_new_with_data(my_compare_with_data, GINT_TO_POINTER(123));
734
+
735
+ for (i = 0; chars[i]; i++) {
736
+ q_tree_insert(tree, &chars[i], &chars[i]);
737
+ }
738
+
739
+ q_tree_foreach(tree, my_traverse, NULL);
740
+
741
+ g_assert(q_tree_nnodes(tree) == strlen(chars));
742
+ g_assert(q_tree_height(tree) == 6);
743
+
744
+ p = chars;
745
+ q_tree_foreach(tree, check_order, &p);
746
+
747
+ for (i = 0; i < 26; i++) {
748
+ removed = q_tree_remove(tree, &chars[i + 10]);
749
+ g_assert(removed);
750
+ }
751
+
752
+ c = '\0';
753
+ removed = q_tree_remove(tree, &c);
754
+ g_assert(!removed);
755
+
756
+ q_tree_foreach(tree, my_traverse, NULL);
757
+
758
+ g_assert(q_tree_nnodes(tree) == strlen(chars2));
759
+ g_assert(q_tree_height(tree) == 6);
760
+
761
+ p = chars2;
762
+ q_tree_foreach(tree, check_order, &p);
763
+
764
+ for (i = 25; i >= 0; i--) {
765
+ q_tree_insert(tree, &chars[i + 10], &chars[i + 10]);
766
+ }
767
+
768
+ p = chars;
769
+ q_tree_foreach(tree, check_order, &p);
770
+
771
+ c = '0';
772
+ p = q_tree_lookup(tree, &c);
773
+ g_assert(p && *p == c);
774
+ g_assert(q_tree_lookup_extended(tree, &c, (gpointer *)&d, (gpointer *)&p));
775
+ g_assert(c == *d && c == *p);
776
+
777
+ c = 'A';
778
+ p = q_tree_lookup(tree, &c);
779
+ g_assert(p && *p == c);
780
+
781
+ c = 'a';
782
+ p = q_tree_lookup(tree, &c);
783
+ g_assert(p && *p == c);
784
+
785
+ c = 'z';
786
+ p = q_tree_lookup(tree, &c);
787
+ g_assert(p && *p == c);
788
+
789
+ c = '!';
790
+ p = q_tree_lookup(tree, &c);
791
+ g_assert(p == NULL);
792
+
793
+ c = '=';
794
+ p = q_tree_lookup(tree, &c);
795
+ g_assert(p == NULL);
796
+
797
+ c = '|';
798
+ p = q_tree_lookup(tree, &c);
799
+ g_assert(p == NULL);
800
+
801
+ c = '0';
802
+ p = q_tree_search(tree, my_search, &c);
803
+ g_assert(p && *p == c);
804
+
805
+ c = 'A';
806
+ p = q_tree_search(tree, my_search, &c);
807
+ g_assert(p && *p == c);
808
+
809
+ c = 'a';
810
+ p = q_tree_search(tree, my_search, &c);
811
+ g_assert(p && *p == c);
812
+
813
+ c = 'z';
814
+ p = q_tree_search(tree, my_search, &c);
815
+ g_assert(p && *p == c);
816
+
817
+ c = '!';
818
+ p = q_tree_search(tree, my_search, &c);
819
+ g_assert(p == NULL);
820
+
821
+ c = '=';
822
+ p = q_tree_search(tree, my_search, &c);
823
+ g_assert(p == NULL);
824
+
825
+ c = '|';
826
+ p = q_tree_search(tree, my_search, &c);
827
+ g_assert(p == NULL);
828
+
829
+ q_tree_destroy(tree);
830
+}
831
+
832
+static void test_tree_remove(void)
833
+{
834
+ QTree *tree;
835
+ char c, d;
836
+ gint i;
837
+ gboolean removed;
838
+
839
+ tree = q_tree_new_full((GCompareDataFunc)my_compare, NULL,
840
+ my_key_destroy,
841
+ my_value_destroy);
842
+
843
+ for (i = 0; chars[i]; i++) {
844
+ q_tree_insert(tree, &chars[i], &chars[i]);
845
+ }
846
+
847
+ c = '0';
848
+ q_tree_insert(tree, &c, &c);
849
+ g_assert(destroyed_key == &c);
850
+ g_assert(destroyed_value == &chars[0]);
851
+ destroyed_key = NULL;
852
+ destroyed_value = NULL;
853
+
854
+ d = '1';
855
+ q_tree_replace(tree, &d, &d);
856
+ g_assert(destroyed_key == &chars[1]);
857
+ g_assert(destroyed_value == &chars[1]);
858
+ destroyed_key = NULL;
859
+ destroyed_value = NULL;
860
+
861
+ c = '2';
862
+ removed = q_tree_remove(tree, &c);
863
+ g_assert(removed);
864
+ g_assert(destroyed_key == &chars[2]);
865
+ g_assert(destroyed_value == &chars[2]);
866
+ destroyed_key = NULL;
867
+ destroyed_value = NULL;
868
+
869
+ c = '3';
870
+ removed = q_tree_steal(tree, &c);
871
+ g_assert(removed);
872
+ g_assert(destroyed_key == NULL);
873
+ g_assert(destroyed_value == NULL);
874
+
875
+ const gchar *remove = "omkjigfedba";
876
+ for (i = 0; remove[i]; i++) {
877
+ removed = q_tree_remove(tree, &remove[i]);
878
+ g_assert(removed);
879
+ }
880
+
881
+ q_tree_destroy(tree);
882
+}
883
+
884
+static void test_tree_destroy(void)
885
+{
886
+ QTree *tree;
887
+ gint i;
888
+
889
+ tree = q_tree_new(my_compare);
890
+
891
+ for (i = 0; chars[i]; i++) {
892
+ q_tree_insert(tree, &chars[i], &chars[i]);
893
+ }
894
+
895
+ g_assert(q_tree_nnodes(tree) == strlen(chars));
896
+
897
+ g_test_message("nnodes: %d", q_tree_nnodes(tree));
898
+ q_tree_ref(tree);
899
+ q_tree_destroy(tree);
900
+
901
+ g_test_message("nnodes: %d", q_tree_nnodes(tree));
902
+ g_assert(q_tree_nnodes(tree) == 0);
903
+
904
+ q_tree_unref(tree);
905
+}
906
+
907
+static void test_tree_insert(void)
908
+{
909
+ QTree *tree;
910
+ gchar *p;
911
+ gint i;
912
+ gchar *scrambled;
913
+
914
+ tree = q_tree_new(my_compare);
915
+
916
+ for (i = 0; chars[i]; i++) {
917
+ q_tree_insert(tree, &chars[i], &chars[i]);
918
+ }
919
+ p = chars;
920
+ q_tree_foreach(tree, check_order, &p);
921
+
922
+ q_tree_unref(tree);
923
+ tree = q_tree_new(my_compare);
924
+
925
+ for (i = strlen(chars) - 1; i >= 0; i--) {
926
+ q_tree_insert(tree, &chars[i], &chars[i]);
927
+ }
928
+ p = chars;
929
+ q_tree_foreach(tree, check_order, &p);
930
+
931
+ q_tree_unref(tree);
932
+ tree = q_tree_new(my_compare);
933
+
934
+ scrambled = g_strdup(chars);
935
+
936
+ for (i = 0; i < 30; i++) {
937
+ gchar tmp;
938
+ gint a, b;
939
+
940
+ a = g_random_int_range(0, strlen(scrambled));
941
+ b = g_random_int_range(0, strlen(scrambled));
942
+ tmp = scrambled[a];
943
+ scrambled[a] = scrambled[b];
944
+ scrambled[b] = tmp;
945
+ }
946
+
947
+ for (i = 0; scrambled[i]; i++) {
948
+ q_tree_insert(tree, &scrambled[i], &scrambled[i]);
949
+ }
950
+ p = chars;
951
+ q_tree_foreach(tree, check_order, &p);
952
+
953
+ g_free(scrambled);
954
+ q_tree_unref(tree);
955
+}
956
+
957
+int main(int argc, char *argv[])
958
+{
959
+ g_test_init(&argc, &argv, NULL);
960
+
961
+ g_test_add_func("/qtree/search", test_tree_search);
962
+ g_test_add_func("/qtree/remove", test_tree_remove);
963
+ g_test_add_func("/qtree/destroy", test_tree_destroy);
964
+ g_test_add_func("/qtree/insert", test_tree_insert);
965
+
966
+ return g_test_run();
967
+}
968
diff --git a/util/qtree.c b/util/qtree.c
969
new file mode 100644
970
index XXXXXXX..XXXXXXX
971
--- /dev/null
972
+++ b/util/qtree.c
973
@@ -XXX,XX +XXX,XX @@
974
+/*
975
+ * GLIB - Library of useful routines for C programming
976
+ * Copyright (C) 1995-1997 Peter Mattis, Spencer Kimball and Josh MacDonald
977
+ *
978
+ * SPDX-License-Identifier: LGPL-2.1-or-later
979
+ *
980
+ * This library is free software; you can redistribute it and/or
981
+ * modify it under the terms of the GNU Lesser General Public
982
+ * License as published by the Free Software Foundation; either
983
+ * version 2.1 of the License, or (at your option) any later version.
984
+ *
985
+ * This library is distributed in the hope that it will be useful,
986
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
987
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32
+ * Lesser General Public License for more details.
988
+ * Lesser General Public License for more details.
33
+ *
989
+ *
34
+ * You should have received a copy of the GNU Lesser General Public
990
+ * You should have received a copy of the GNU Lesser General Public
35
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
991
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
36
+ */
992
+ */
37
+
993
+
994
+/*
995
+ * Modified by the GLib Team and others 1997-2000. See the AUTHORS
996
+ * file for a list of people on the GLib Team. See the ChangeLog
997
+ * files for a list of changes. These files are distributed with
998
+ * GLib at ftp://ftp.gtk.org/pub/gtk/.
999
+ */
1000
+
1001
+/*
1002
+ * MT safe
1003
+ */
1004
+
38
+#include "qemu/osdep.h"
1005
+#include "qemu/osdep.h"
39
+#include "qemu/host-utils.h"
1006
+#include "qemu/qtree.h"
40
+
1007
+
41
+typedef struct {
1008
+/**
42
+ uint64_t high;
1009
+ * SECTION:trees-binary
43
+ uint64_t low;
1010
+ * @title: Balanced Binary Trees
44
+ uint64_t rhigh;
1011
+ * @short_description: a sorted collection of key/value pairs optimized
45
+ uint64_t rlow;
1012
+ * for searching and traversing in order
46
+ uint64_t divisor;
1013
+ *
47
+ uint64_t remainder;
1014
+ * The #QTree structure and its associated functions provide a sorted
48
+} test_data_unsigned;
1015
+ * collection of key/value pairs optimized for searching and traversing
49
+
1016
+ * in order. This means that most of the operations (access, search,
50
+typedef struct {
1017
+ * insertion, deletion, ...) on #QTree are O(log(n)) in average and O(n)
51
+ int64_t high;
1018
+ * in worst case for time complexity. But, note that maintaining a
52
+ uint64_t low;
1019
+ * balanced sorted #QTree of n elements is done in time O(n log(n)).
53
+ int64_t rhigh;
1020
+ *
54
+ uint64_t rlow;
1021
+ * To create a new #QTree use q_tree_new().
55
+ int64_t divisor;
1022
+ *
56
+ int64_t remainder;
1023
+ * To insert a key/value pair into a #QTree use q_tree_insert()
57
+} test_data_signed;
1024
+ * (O(n log(n))).
58
+
1025
+ *
59
+static const test_data_unsigned test_table_unsigned[] = {
1026
+ * To remove a key/value pair use q_tree_remove() (O(n log(n))).
60
+ /* Dividend fits in 64 bits */
1027
+ *
61
+ { 0x0000000000000000ULL, 0x0000000000000000ULL,
1028
+ * To look up the value corresponding to a given key, use
62
+ 0x0000000000000000ULL, 0x0000000000000000ULL,
1029
+ * q_tree_lookup() and q_tree_lookup_extended().
63
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
1030
+ *
64
+ { 0x0000000000000000ULL, 0x0000000000000001ULL,
1031
+ * To find out the number of nodes in a #QTree, use q_tree_nnodes(). To
65
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
1032
+ * get the height of a #QTree, use q_tree_height().
66
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
1033
+ *
67
+ { 0x0000000000000000ULL, 0x0000000000000003ULL,
1034
+ * To traverse a #QTree, calling a function for each node visited in
68
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
1035
+ * the traversal, use q_tree_foreach().
69
+ 0x0000000000000002ULL, 0x0000000000000001ULL},
1036
+ *
70
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
1037
+ * To destroy a #QTree, use q_tree_destroy().
71
+ 0x0000000000000000ULL, 0x8000000000000000ULL,
1038
+ **/
72
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
1039
+
73
+ { 0x0000000000000000ULL, 0xa000000000000000ULL,
1040
+#define MAX_GTREE_HEIGHT 40
74
+ 0x0000000000000000ULL, 0x0000000000000002ULL,
1041
+
75
+ 0x4000000000000000ULL, 0x2000000000000000ULL},
1042
+/**
76
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
1043
+ * QTree:
77
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
1044
+ *
78
+ 0x8000000000000000ULL, 0x0000000000000000ULL},
1045
+ * The QTree struct is an opaque data structure representing a
79
+
1046
+ * [balanced binary tree][glib-Balanced-Binary-Trees]. It should be
80
+ /* Dividend > 64 bits, with MSB 0 */
1047
+ * accessed only by using the following functions.
81
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
1048
+ */
82
+ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
1049
+struct _QTree {
83
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
1050
+ QTreeNode *root;
84
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
1051
+ GCompareDataFunc key_compare;
85
+ 0x0000000000000001ULL, 0x000000000000000dULL,
1052
+ GDestroyNotify key_destroy_func;
86
+ 0x123456789abcdefeULL, 0x03456789abcdf03bULL},
1053
+ GDestroyNotify value_destroy_func;
87
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
1054
+ gpointer key_compare_data;
88
+ 0x0123456789abcdefULL, 0xeefedcba98765432ULL,
1055
+ guint nnodes;
89
+ 0x0000000000000010ULL, 0x0000000000000001ULL},
1056
+ gint ref_count;
90
+
1057
+};
91
+ /* Dividend > 64 bits, with MSB 1 */
1058
+
92
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
1059
+struct _QTreeNode {
93
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
1060
+ gpointer key; /* key for this node */
94
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
1061
+ gpointer value; /* value stored at this node */
95
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
1062
+ QTreeNode *left; /* left subtree */
96
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
1063
+ QTreeNode *right; /* right subtree */
97
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
1064
+ gint8 balance; /* height (right) - height (left) */
98
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
1065
+ guint8 left_child;
99
+ 0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
1066
+ guint8 right_child;
100
+ 0x0000000000000010ULL, 0x000000000000000fULL},
1067
+};
101
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
1068
+
102
+ 0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
1069
+
103
+ 0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
1070
+static QTreeNode *q_tree_node_new(gpointer key,
104
+
1071
+ gpointer value);
105
+ /**
1072
+static QTreeNode *q_tree_insert_internal(QTree *tree,
106
+ * Divisor == 64 bits, with MSB 1
1073
+ gpointer key,
107
+ * and high 64 bits of dividend >= divisor
1074
+ gpointer value,
108
+ * (for testing normalization)
1075
+ gboolean replace);
1076
+static gboolean q_tree_remove_internal(QTree *tree,
1077
+ gconstpointer key,
1078
+ gboolean steal);
1079
+static QTreeNode *q_tree_node_balance(QTreeNode *node);
1080
+static QTreeNode *q_tree_find_node(QTree *tree,
1081
+ gconstpointer key);
1082
+static QTreeNode *q_tree_node_search(QTreeNode *node,
1083
+ GCompareFunc search_func,
1084
+ gconstpointer data);
1085
+static QTreeNode *q_tree_node_rotate_left(QTreeNode *node);
1086
+static QTreeNode *q_tree_node_rotate_right(QTreeNode *node);
1087
+#ifdef Q_TREE_DEBUG
1088
+static void q_tree_node_check(QTreeNode *node);
1089
+#endif
1090
+
1091
+static QTreeNode*
1092
+q_tree_node_new(gpointer key,
1093
+ gpointer value)
1094
+{
1095
+ QTreeNode *node = g_new(QTreeNode, 1);
1096
+
1097
+ node->balance = 0;
1098
+ node->left = NULL;
1099
+ node->right = NULL;
1100
+ node->left_child = FALSE;
1101
+ node->right_child = FALSE;
1102
+ node->key = key;
1103
+ node->value = value;
1104
+
1105
+ return node;
1106
+}
1107
+
1108
+/**
1109
+ * q_tree_new:
1110
+ * @key_compare_func: the function used to order the nodes in the #QTree.
1111
+ * It should return values similar to the standard strcmp() function -
1112
+ * 0 if the two arguments are equal, a negative value if the first argument
1113
+ * comes before the second, or a positive value if the first argument comes
1114
+ * after the second.
1115
+ *
1116
+ * Creates a new #QTree.
1117
+ *
1118
+ * Returns: a newly allocated #QTree
1119
+ */
1120
+QTree *
1121
+q_tree_new(GCompareFunc key_compare_func)
1122
+{
1123
+ g_return_val_if_fail(key_compare_func != NULL, NULL);
1124
+
1125
+ return q_tree_new_full((GCompareDataFunc) key_compare_func, NULL,
1126
+ NULL, NULL);
1127
+}
1128
+
1129
+/**
1130
+ * q_tree_new_with_data:
1131
+ * @key_compare_func: qsort()-style comparison function
1132
+ * @key_compare_data: data to pass to comparison function
1133
+ *
1134
+ * Creates a new #QTree with a comparison function that accepts user data.
1135
+ * See q_tree_new() for more details.
1136
+ *
1137
+ * Returns: a newly allocated #QTree
1138
+ */
1139
+QTree *
1140
+q_tree_new_with_data(GCompareDataFunc key_compare_func,
1141
+ gpointer key_compare_data)
1142
+{
1143
+ g_return_val_if_fail(key_compare_func != NULL, NULL);
1144
+
1145
+ return q_tree_new_full(key_compare_func, key_compare_data,
1146
+ NULL, NULL);
1147
+}
1148
+
1149
+/**
1150
+ * q_tree_new_full:
1151
+ * @key_compare_func: qsort()-style comparison function
1152
+ * @key_compare_data: data to pass to comparison function
1153
+ * @key_destroy_func: a function to free the memory allocated for the key
1154
+ * used when removing the entry from the #QTree or %NULL if you don't
1155
+ * want to supply such a function
1156
+ * @value_destroy_func: a function to free the memory allocated for the
1157
+ * value used when removing the entry from the #QTree or %NULL if you
1158
+ * don't want to supply such a function
1159
+ *
1160
+ * Creates a new #QTree like q_tree_new() and allows to specify functions
1161
+ * to free the memory allocated for the key and value that get called when
1162
+ * removing the entry from the #QTree.
1163
+ *
1164
+ * Returns: a newly allocated #QTree
1165
+ */
1166
+QTree *
1167
+q_tree_new_full(GCompareDataFunc key_compare_func,
1168
+ gpointer key_compare_data,
1169
+ GDestroyNotify key_destroy_func,
1170
+ GDestroyNotify value_destroy_func)
1171
+{
1172
+ QTree *tree;
1173
+
1174
+ g_return_val_if_fail(key_compare_func != NULL, NULL);
1175
+
1176
+ tree = g_new(QTree, 1);
1177
+ tree->root = NULL;
1178
+ tree->key_compare = key_compare_func;
1179
+ tree->key_destroy_func = key_destroy_func;
1180
+ tree->value_destroy_func = value_destroy_func;
1181
+ tree->key_compare_data = key_compare_data;
1182
+ tree->nnodes = 0;
1183
+ tree->ref_count = 1;
1184
+
1185
+ return tree;
1186
+}
1187
+
1188
+/**
1189
+ * q_tree_node_first:
1190
+ * @tree: a #QTree
1191
+ *
1192
+ * Returns the first in-order node of the tree, or %NULL
1193
+ * for an empty tree.
1194
+ *
1195
+ * Returns: (nullable) (transfer none): the first node in the tree
1196
+ *
1197
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
1198
+ */
1199
+static QTreeNode *
1200
+q_tree_node_first(QTree *tree)
1201
+{
1202
+ QTreeNode *tmp;
1203
+
1204
+ g_return_val_if_fail(tree != NULL, NULL);
1205
+
1206
+ if (!tree->root) {
1207
+ return NULL;
1208
+ }
1209
+
1210
+ tmp = tree->root;
1211
+
1212
+ while (tmp->left_child) {
1213
+ tmp = tmp->left;
1214
+ }
1215
+
1216
+ return tmp;
1217
+}
1218
+
1219
+/**
1220
+ * q_tree_node_previous
1221
+ * @node: a #QTree node
1222
+ *
1223
+ * Returns the previous in-order node of the tree, or %NULL
1224
+ * if the passed node was already the first one.
1225
+ *
1226
+ * Returns: (nullable) (transfer none): the previous node in the tree
1227
+ *
1228
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
1229
+ */
1230
+static QTreeNode *
1231
+q_tree_node_previous(QTreeNode *node)
1232
+{
1233
+ QTreeNode *tmp;
1234
+
1235
+ g_return_val_if_fail(node != NULL, NULL);
1236
+
1237
+ tmp = node->left;
1238
+
1239
+ if (node->left_child) {
1240
+ while (tmp->right_child) {
1241
+ tmp = tmp->right;
1242
+ }
1243
+ }
1244
+
1245
+ return tmp;
1246
+}
1247
+
1248
+/**
1249
+ * q_tree_node_next
1250
+ * @node: a #QTree node
1251
+ *
1252
+ * Returns the next in-order node of the tree, or %NULL
1253
+ * if the passed node was already the last one.
1254
+ *
1255
+ * Returns: (nullable) (transfer none): the next node in the tree
1256
+ *
1257
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
1258
+ */
1259
+static QTreeNode *
1260
+q_tree_node_next(QTreeNode *node)
1261
+{
1262
+ QTreeNode *tmp;
1263
+
1264
+ g_return_val_if_fail(node != NULL, NULL);
1265
+
1266
+ tmp = node->right;
1267
+
1268
+ if (node->right_child) {
1269
+ while (tmp->left_child) {
1270
+ tmp = tmp->left;
1271
+ }
1272
+ }
1273
+
1274
+ return tmp;
1275
+}
1276
+
1277
+/**
1278
+ * q_tree_remove_all:
1279
+ * @tree: a #QTree
1280
+ *
1281
+ * Removes all nodes from a #QTree and destroys their keys and values,
1282
+ * then resets the #QTree’s root to %NULL.
1283
+ *
1284
+ * Since: 2.70 in GLib. Internal in Qtree, i.e. not in the public API.
1285
+ */
1286
+static void
1287
+q_tree_remove_all(QTree *tree)
1288
+{
1289
+ QTreeNode *node;
1290
+ QTreeNode *next;
1291
+
1292
+ g_return_if_fail(tree != NULL);
1293
+
1294
+ node = q_tree_node_first(tree);
1295
+
1296
+ while (node) {
1297
+ next = q_tree_node_next(node);
1298
+
1299
+ if (tree->key_destroy_func) {
1300
+ tree->key_destroy_func(node->key);
1301
+ }
1302
+ if (tree->value_destroy_func) {
1303
+ tree->value_destroy_func(node->value);
1304
+ }
1305
+ g_free(node);
1306
+
1307
+#ifdef Q_TREE_DEBUG
1308
+ g_assert(tree->nnodes > 0);
1309
+ tree->nnodes--;
1310
+#endif
1311
+
1312
+ node = next;
1313
+ }
1314
+
1315
+#ifdef Q_TREE_DEBUG
1316
+ g_assert(tree->nnodes == 0);
1317
+#endif
1318
+
1319
+ tree->root = NULL;
1320
+#ifndef Q_TREE_DEBUG
1321
+ tree->nnodes = 0;
1322
+#endif
1323
+}
1324
+
1325
+/**
1326
+ * q_tree_ref:
1327
+ * @tree: a #QTree
1328
+ *
1329
+ * Increments the reference count of @tree by one.
1330
+ *
1331
+ * It is safe to call this function from any thread.
1332
+ *
1333
+ * Returns: the passed in #QTree
1334
+ *
1335
+ * Since: 2.22
1336
+ */
1337
+QTree *
1338
+q_tree_ref(QTree *tree)
1339
+{
1340
+ g_return_val_if_fail(tree != NULL, NULL);
1341
+
1342
+ g_atomic_int_inc(&tree->ref_count);
1343
+
1344
+ return tree;
1345
+}
1346
+
1347
+/**
1348
+ * q_tree_unref:
1349
+ * @tree: a #QTree
1350
+ *
1351
+ * Decrements the reference count of @tree by one.
1352
+ * If the reference count drops to 0, all keys and values will
1353
+ * be destroyed (if destroy functions were specified) and all
1354
+ * memory allocated by @tree will be released.
1355
+ *
1356
+ * It is safe to call this function from any thread.
1357
+ *
1358
+ * Since: 2.22
1359
+ */
1360
+void
1361
+q_tree_unref(QTree *tree)
1362
+{
1363
+ g_return_if_fail(tree != NULL);
1364
+
1365
+ if (g_atomic_int_dec_and_test(&tree->ref_count)) {
1366
+ q_tree_remove_all(tree);
1367
+ g_free(tree);
1368
+ }
1369
+}
1370
+
1371
+/**
1372
+ * q_tree_destroy:
1373
+ * @tree: a #QTree
1374
+ *
1375
+ * Removes all keys and values from the #QTree and decreases its
1376
+ * reference count by one. If keys and/or values are dynamically
1377
+ * allocated, you should either free them first or create the #QTree
1378
+ * using q_tree_new_full(). In the latter case the destroy functions
1379
+ * you supplied will be called on all keys and values before destroying
1380
+ * the #QTree.
1381
+ */
1382
+void
1383
+q_tree_destroy(QTree *tree)
1384
+{
1385
+ g_return_if_fail(tree != NULL);
1386
+
1387
+ q_tree_remove_all(tree);
1388
+ q_tree_unref(tree);
1389
+}
1390
+
1391
+/**
1392
+ * q_tree_insert_node:
1393
+ * @tree: a #QTree
1394
+ * @key: the key to insert
1395
+ * @value: the value corresponding to the key
1396
+ *
1397
+ * Inserts a key/value pair into a #QTree.
1398
+ *
1399
+ * If the given key already exists in the #QTree its corresponding value
1400
+ * is set to the new value. If you supplied a @value_destroy_func when
1401
+ * creating the #QTree, the old value is freed using that function. If
1402
+ * you supplied a @key_destroy_func when creating the #QTree, the passed
1403
+ * key is freed using that function.
1404
+ *
1405
+ * The tree is automatically 'balanced' as new key/value pairs are added,
1406
+ * so that the distance from the root to every leaf is as small as possible.
1407
+ * The cost of maintaining a balanced tree while inserting new key/value
1408
+ * result in a O(n log(n)) operation where most of the other operations
1409
+ * are O(log(n)).
1410
+ *
1411
+ * Returns: (transfer none): the inserted (or set) node.
1412
+ *
1413
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
1414
+ */
1415
+static QTreeNode *
1416
+q_tree_insert_node(QTree *tree,
1417
+ gpointer key,
1418
+ gpointer value)
1419
+{
1420
+ QTreeNode *node;
1421
+
1422
+ g_return_val_if_fail(tree != NULL, NULL);
1423
+
1424
+ node = q_tree_insert_internal(tree, key, value, FALSE);
1425
+
1426
+#ifdef Q_TREE_DEBUG
1427
+ q_tree_node_check(tree->root);
1428
+#endif
1429
+
1430
+ return node;
1431
+}
1432
+
1433
+/**
1434
+ * q_tree_insert:
1435
+ * @tree: a #QTree
1436
+ * @key: the key to insert
1437
+ * @value: the value corresponding to the key
1438
+ *
1439
+ * Inserts a key/value pair into a #QTree.
1440
+ *
1441
+ * Inserts a new key and value into a #QTree as q_tree_insert_node() does,
1442
+ * only this function does not return the inserted or set node.
1443
+ */
1444
+void
1445
+q_tree_insert(QTree *tree,
1446
+ gpointer key,
1447
+ gpointer value)
1448
+{
1449
+ q_tree_insert_node(tree, key, value);
1450
+}
1451
+
1452
+/**
1453
+ * q_tree_replace_node:
1454
+ * @tree: a #QTree
1455
+ * @key: the key to insert
1456
+ * @value: the value corresponding to the key
1457
+ *
1458
+ * Inserts a new key and value into a #QTree similar to q_tree_insert_node().
1459
+ * The difference is that if the key already exists in the #QTree, it gets
1460
+ * replaced by the new key. If you supplied a @value_destroy_func when
1461
+ * creating the #QTree, the old value is freed using that function. If you
1462
+ * supplied a @key_destroy_func when creating the #QTree, the old key is
1463
+ * freed using that function.
1464
+ *
1465
+ * The tree is automatically 'balanced' as new key/value pairs are added,
1466
+ * so that the distance from the root to every leaf is as small as possible.
1467
+ *
1468
+ * Returns: (transfer none): the inserted (or set) node.
1469
+ *
1470
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
1471
+ */
1472
+static QTreeNode *
1473
+q_tree_replace_node(QTree *tree,
1474
+ gpointer key,
1475
+ gpointer value)
1476
+{
1477
+ QTreeNode *node;
1478
+
1479
+ g_return_val_if_fail(tree != NULL, NULL);
1480
+
1481
+ node = q_tree_insert_internal(tree, key, value, TRUE);
1482
+
1483
+#ifdef Q_TREE_DEBUG
1484
+ q_tree_node_check(tree->root);
1485
+#endif
1486
+
1487
+ return node;
1488
+}
1489
+
1490
+/**
1491
+ * q_tree_replace:
1492
+ * @tree: a #QTree
1493
+ * @key: the key to insert
1494
+ * @value: the value corresponding to the key
1495
+ *
1496
+ * Inserts a new key and value into a #QTree as q_tree_replace_node() does,
1497
+ * only this function does not return the inserted or set node.
1498
+ */
1499
+void
1500
+q_tree_replace(QTree *tree,
1501
+ gpointer key,
1502
+ gpointer value)
1503
+{
1504
+ q_tree_replace_node(tree, key, value);
1505
+}
1506
+
1507
+/* internal insert routine */
1508
+static QTreeNode *
1509
+q_tree_insert_internal(QTree *tree,
1510
+ gpointer key,
1511
+ gpointer value,
1512
+ gboolean replace)
1513
+{
1514
+ QTreeNode *node, *retnode;
1515
+ QTreeNode *path[MAX_GTREE_HEIGHT];
1516
+ int idx;
1517
+
1518
+ g_return_val_if_fail(tree != NULL, NULL);
1519
+
1520
+ if (!tree->root) {
1521
+ tree->root = q_tree_node_new(key, value);
1522
+ tree->nnodes++;
1523
+ return tree->root;
1524
+ }
1525
+
1526
+ idx = 0;
1527
+ path[idx++] = NULL;
1528
+ node = tree->root;
1529
+
1530
+ while (1) {
1531
+ int cmp = tree->key_compare(key, node->key, tree->key_compare_data);
1532
+
1533
+ if (cmp == 0) {
1534
+ if (tree->value_destroy_func) {
1535
+ tree->value_destroy_func(node->value);
1536
+ }
1537
+
1538
+ node->value = value;
1539
+
1540
+ if (replace) {
1541
+ if (tree->key_destroy_func) {
1542
+ tree->key_destroy_func(node->key);
1543
+ }
1544
+
1545
+ node->key = key;
1546
+ } else {
1547
+ /* free the passed key */
1548
+ if (tree->key_destroy_func) {
1549
+ tree->key_destroy_func(key);
1550
+ }
1551
+ }
1552
+
1553
+ return node;
1554
+ } else if (cmp < 0) {
1555
+ if (node->left_child) {
1556
+ path[idx++] = node;
1557
+ node = node->left;
1558
+ } else {
1559
+ QTreeNode *child = q_tree_node_new(key, value);
1560
+
1561
+ child->left = node->left;
1562
+ child->right = node;
1563
+ node->left = child;
1564
+ node->left_child = TRUE;
1565
+ node->balance -= 1;
1566
+
1567
+ tree->nnodes++;
1568
+
1569
+ retnode = child;
1570
+ break;
1571
+ }
1572
+ } else {
1573
+ if (node->right_child) {
1574
+ path[idx++] = node;
1575
+ node = node->right;
1576
+ } else {
1577
+ QTreeNode *child = q_tree_node_new(key, value);
1578
+
1579
+ child->right = node->right;
1580
+ child->left = node;
1581
+ node->right = child;
1582
+ node->right_child = TRUE;
1583
+ node->balance += 1;
1584
+
1585
+ tree->nnodes++;
1586
+
1587
+ retnode = child;
1588
+ break;
1589
+ }
1590
+ }
1591
+ }
1592
+
1593
+ /*
1594
+ * Restore balance. This is the goodness of a non-recursive
1595
+ * implementation, when we are done with balancing we 'break'
1596
+ * the loop and we are done.
109
+ */
1597
+ */
110
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
1598
+ while (1) {
111
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
1599
+ QTreeNode *bparent = path[--idx];
112
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
1600
+ gboolean left_node = (bparent && node == bparent->left);
113
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
1601
+ g_assert(!bparent || bparent->left == node || bparent->right == node);
114
+ 0x0000000000000001ULL, 0xfddbb9977553310aULL,
1602
+
115
+ 0x8000000000000001ULL, 0x78899aabbccddf05ULL},
1603
+ if (node->balance < -1 || node->balance > 1) {
116
+
1604
+ node = q_tree_node_balance(node);
117
+ /* Dividend > 64 bits, divisor almost as big */
1605
+ if (bparent == NULL) {
118
+ { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
1606
+ tree->root = node;
119
+ 0x0000000000000000ULL, 0x000000000000000fULL,
1607
+ } else if (left_node) {
120
+ 0x123456789abcdefeULL, 0x123456789abcde1fULL},
1608
+ bparent->left = node;
121
+};
1609
+ } else {
122
+
1610
+ bparent->right = node;
123
+static const test_data_signed test_table_signed[] = {
1611
+ }
124
+ /* Positive dividend, positive/negative divisors */
1612
+ }
125
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
1613
+
126
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
1614
+ if (node->balance == 0 || bparent == NULL) {
127
+ 0x0000000000000001LL, 0x0000000000000000LL},
1615
+ break;
128
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
1616
+ }
129
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1617
+
130
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
1618
+ if (left_node) {
131
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
1619
+ bparent->balance -= 1;
132
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
1620
+ } else {
133
+ 0x0000000000000002LL, 0x0000000000000000LL},
1621
+ bparent->balance += 1;
134
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
1622
+ }
135
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
1623
+
136
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
1624
+ node = bparent;
137
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
1625
+ }
138
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
1626
+
139
+ 0x0000000000000008LL, 0x0000000000000006LL},
1627
+ return retnode;
140
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
1628
+}
141
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
1629
+
142
+ 0xfffffffffffffff8LL, 0x0000000000000006LL},
1630
+/**
143
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
1631
+ * q_tree_remove:
144
+ 0x0000000000000000LL, 0x000000000000550dULL,
1632
+ * @tree: a #QTree
145
+ 0x0000000000000237LL, 0x0000000000000183LL},
1633
+ * @key: the key to remove
146
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
1634
+ *
147
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
1635
+ * Removes a key/value pair from a #QTree.
148
+ 0xfffffffffffffdc9LL, 0x0000000000000183LL},
1636
+ *
149
+
1637
+ * If the #QTree was created using q_tree_new_full(), the key and value
150
+ /* Negative dividend, positive/negative divisors */
1638
+ * are freed using the supplied destroy functions, otherwise you have to
151
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1639
+ * make sure that any dynamically allocated values are freed yourself.
152
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1640
+ * If the key does not exist in the #QTree, the function does nothing.
153
+ 0x0000000000000001LL, 0x0000000000000000LL},
1641
+ *
154
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1642
+ * The cost of maintaining a balanced tree while removing a key/value
155
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
1643
+ * result in a O(n log(n)) operation where most of the other operations
156
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
1644
+ * are O(log(n)).
157
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1645
+ *
158
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
1646
+ * Returns: %TRUE if the key was found (prior to 2.8, this function
159
+ 0x0000000000000002LL, 0x0000000000000000LL},
1647
+ * returned nothing)
160
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1648
+ */
161
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
1649
+gboolean
162
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
1650
+q_tree_remove(QTree *tree,
163
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1651
+ gconstpointer key)
164
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
1652
+{
165
+ 0x0000000000000008LL, 0xfffffffffffffffaLL},
1653
+ gboolean removed;
166
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1654
+
167
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
1655
+ g_return_val_if_fail(tree != NULL, FALSE);
168
+ 0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
1656
+
169
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1657
+ removed = q_tree_remove_internal(tree, key, FALSE);
170
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
1658
+
171
+ 0x0000000000000237LL, 0xfffffffffffffe7dLL},
1659
+#ifdef Q_TREE_DEBUG
172
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
1660
+ q_tree_node_check(tree->root);
173
+ 0x0000000000000000LL, 0x000000000000550dULL,
1661
+#endif
174
+ 0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
1662
+
175
+};
1663
+ return removed;
176
+
1664
+}
177
+static void test_divu128(void)
1665
+
178
+{
1666
+/**
179
+ int i;
1667
+ * q_tree_steal:
180
+ uint64_t rem;
1668
+ * @tree: a #QTree
181
+ test_data_unsigned tmp;
1669
+ * @key: the key to remove
182
+
1670
+ *
183
+ for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
1671
+ * Removes a key and its associated value from a #QTree without calling
184
+ tmp = test_table_unsigned[i];
1672
+ * the key and value destroy functions.
185
+
1673
+ *
186
+ rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
1674
+ * If the key does not exist in the #QTree, the function does nothing.
187
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
1675
+ *
188
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
1676
+ * Returns: %TRUE if the key was found (prior to 2.8, this function
189
+ g_assert_cmpuint(rem, ==, tmp.remainder);
1677
+ * returned nothing)
190
+ }
1678
+ */
191
+}
1679
+gboolean
192
+
1680
+q_tree_steal(QTree *tree,
193
+static void test_divs128(void)
1681
+ gconstpointer key)
194
+{
1682
+{
195
+ int i;
1683
+ gboolean removed;
196
+ int64_t rem;
1684
+
197
+ test_data_signed tmp;
1685
+ g_return_val_if_fail(tree != NULL, FALSE);
198
+
1686
+
199
+ for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
1687
+ removed = q_tree_remove_internal(tree, key, TRUE);
200
+ tmp = test_table_signed[i];
1688
+
201
+
1689
+#ifdef Q_TREE_DEBUG
202
+ rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
1690
+ q_tree_node_check(tree->root);
203
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
1691
+#endif
204
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
1692
+
205
+ g_assert_cmpuint(rem, ==, tmp.remainder);
1693
+ return removed;
206
+ }
1694
+}
207
+}
1695
+
208
+
1696
+/* internal remove routine */
209
+int main(int argc, char **argv)
1697
+static gboolean
210
+{
1698
+q_tree_remove_internal(QTree *tree,
211
+ g_test_init(&argc, &argv, NULL);
1699
+ gconstpointer key,
212
+ g_test_add_func("/host-utils/test_divu128", test_divu128);
1700
+ gboolean steal)
213
+ g_test_add_func("/host-utils/test_divs128", test_divs128);
1701
+{
214
+ return g_test_run();
1702
+ QTreeNode *node, *parent, *balance;
215
+}
1703
+ QTreeNode *path[MAX_GTREE_HEIGHT];
1704
+ int idx;
1705
+ gboolean left_node;
1706
+
1707
+ g_return_val_if_fail(tree != NULL, FALSE);
1708
+
1709
+ if (!tree->root) {
1710
+ return FALSE;
1711
+ }
1712
+
1713
+ idx = 0;
1714
+ path[idx++] = NULL;
1715
+ node = tree->root;
1716
+
1717
+ while (1) {
1718
+ int cmp = tree->key_compare(key, node->key, tree->key_compare_data);
1719
+
1720
+ if (cmp == 0) {
1721
+ break;
1722
+ } else if (cmp < 0) {
1723
+ if (!node->left_child) {
1724
+ return FALSE;
1725
+ }
1726
+
1727
+ path[idx++] = node;
1728
+ node = node->left;
1729
+ } else {
1730
+ if (!node->right_child) {
1731
+ return FALSE;
1732
+ }
1733
+
1734
+ path[idx++] = node;
1735
+ node = node->right;
1736
+ }
1737
+ }
1738
+
1739
+ /*
1740
+ * The following code is almost equal to q_tree_remove_node,
1741
+ * except that we do not have to call q_tree_node_parent.
1742
+ */
1743
+ balance = parent = path[--idx];
1744
+ g_assert(!parent || parent->left == node || parent->right == node);
1745
+ left_node = (parent && node == parent->left);
1746
+
1747
+ if (!node->left_child) {
1748
+ if (!node->right_child) {
1749
+ if (!parent) {
1750
+ tree->root = NULL;
1751
+ } else if (left_node) {
1752
+ parent->left_child = FALSE;
1753
+ parent->left = node->left;
1754
+ parent->balance += 1;
1755
+ } else {
1756
+ parent->right_child = FALSE;
1757
+ parent->right = node->right;
1758
+ parent->balance -= 1;
1759
+ }
1760
+ } else {
1761
+ /* node has a right child */
1762
+ QTreeNode *tmp = q_tree_node_next(node);
1763
+ tmp->left = node->left;
1764
+
1765
+ if (!parent) {
1766
+ tree->root = node->right;
1767
+ } else if (left_node) {
1768
+ parent->left = node->right;
1769
+ parent->balance += 1;
1770
+ } else {
1771
+ parent->right = node->right;
1772
+ parent->balance -= 1;
1773
+ }
1774
+ }
1775
+ } else {
1776
+ /* node has a left child */
1777
+ if (!node->right_child) {
1778
+ QTreeNode *tmp = q_tree_node_previous(node);
1779
+ tmp->right = node->right;
1780
+
1781
+ if (parent == NULL) {
1782
+ tree->root = node->left;
1783
+ } else if (left_node) {
1784
+ parent->left = node->left;
1785
+ parent->balance += 1;
1786
+ } else {
1787
+ parent->right = node->left;
1788
+ parent->balance -= 1;
1789
+ }
1790
+ } else {
1791
+ /* node has a both children (pant, pant!) */
1792
+ QTreeNode *prev = node->left;
1793
+ QTreeNode *next = node->right;
1794
+ QTreeNode *nextp = node;
1795
+ int old_idx = idx + 1;
1796
+ idx++;
1797
+
1798
+ /* path[idx] == parent */
1799
+ /* find the immediately next node (and its parent) */
1800
+ while (next->left_child) {
1801
+ path[++idx] = nextp = next;
1802
+ next = next->left;
1803
+ }
1804
+
1805
+ path[old_idx] = next;
1806
+ balance = path[idx];
1807
+
1808
+ /* remove 'next' from the tree */
1809
+ if (nextp != node) {
1810
+ if (next->right_child) {
1811
+ nextp->left = next->right;
1812
+ } else {
1813
+ nextp->left_child = FALSE;
1814
+ }
1815
+ nextp->balance += 1;
1816
+
1817
+ next->right_child = TRUE;
1818
+ next->right = node->right;
1819
+ } else {
1820
+ node->balance -= 1;
1821
+ }
1822
+
1823
+ /* set the prev to point to the right place */
1824
+ while (prev->right_child) {
1825
+ prev = prev->right;
1826
+ }
1827
+ prev->right = next;
1828
+
1829
+ /* prepare 'next' to replace 'node' */
1830
+ next->left_child = TRUE;
1831
+ next->left = node->left;
1832
+ next->balance = node->balance;
1833
+
1834
+ if (!parent) {
1835
+ tree->root = next;
1836
+ } else if (left_node) {
1837
+ parent->left = next;
1838
+ } else {
1839
+ parent->right = next;
1840
+ }
1841
+ }
1842
+ }
1843
+
1844
+ /* restore balance */
1845
+ if (balance) {
1846
+ while (1) {
1847
+ QTreeNode *bparent = path[--idx];
1848
+ g_assert(!bparent ||
1849
+ bparent->left == balance ||
1850
+ bparent->right == balance);
1851
+ left_node = (bparent && balance == bparent->left);
1852
+
1853
+ if (balance->balance < -1 || balance->balance > 1) {
1854
+ balance = q_tree_node_balance(balance);
1855
+ if (!bparent) {
1856
+ tree->root = balance;
1857
+ } else if (left_node) {
1858
+ bparent->left = balance;
1859
+ } else {
1860
+ bparent->right = balance;
1861
+ }
1862
+ }
1863
+
1864
+ if (balance->balance != 0 || !bparent) {
1865
+ break;
1866
+ }
1867
+
1868
+ if (left_node) {
1869
+ bparent->balance += 1;
1870
+ } else {
1871
+ bparent->balance -= 1;
1872
+ }
1873
+
1874
+ balance = bparent;
1875
+ }
1876
+ }
1877
+
1878
+ if (!steal) {
1879
+ if (tree->key_destroy_func) {
1880
+ tree->key_destroy_func(node->key);
1881
+ }
1882
+ if (tree->value_destroy_func) {
1883
+ tree->value_destroy_func(node->value);
1884
+ }
1885
+ }
1886
+
1887
+ g_free(node);
1888
+
1889
+ tree->nnodes--;
1890
+
1891
+ return TRUE;
1892
+}
1893
+
1894
+/**
1895
+ * q_tree_lookup_node:
1896
+ * @tree: a #QTree
1897
+ * @key: the key to look up
1898
+ *
1899
+ * Gets the tree node corresponding to the given key. Since a #QTree is
1900
+ * automatically balanced as key/value pairs are added, key lookup
1901
+ * is O(log n) (where n is the number of key/value pairs in the tree).
1902
+ *
1903
+ * Returns: (nullable) (transfer none): the tree node corresponding to
1904
+ * the key, or %NULL if the key was not found
1905
+ *
1906
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
1907
+ */
1908
+static QTreeNode *
1909
+q_tree_lookup_node(QTree *tree,
1910
+ gconstpointer key)
1911
+{
1912
+ g_return_val_if_fail(tree != NULL, NULL);
1913
+
1914
+ return q_tree_find_node(tree, key);
1915
+}
1916
+
1917
+/**
1918
+ * q_tree_lookup:
1919
+ * @tree: a #QTree
1920
+ * @key: the key to look up
1921
+ *
1922
+ * Gets the value corresponding to the given key. Since a #QTree is
1923
+ * automatically balanced as key/value pairs are added, key lookup
1924
+ * is O(log n) (where n is the number of key/value pairs in the tree).
1925
+ *
1926
+ * Returns: the value corresponding to the key, or %NULL
1927
+ * if the key was not found
1928
+ */
1929
+gpointer
1930
+q_tree_lookup(QTree *tree,
1931
+ gconstpointer key)
1932
+{
1933
+ QTreeNode *node;
1934
+
1935
+ node = q_tree_lookup_node(tree, key);
1936
+
1937
+ return node ? node->value : NULL;
1938
+}
1939
+
1940
+/**
1941
+ * q_tree_lookup_extended:
1942
+ * @tree: a #QTree
1943
+ * @lookup_key: the key to look up
1944
+ * @orig_key: (out) (optional) (nullable): returns the original key
1945
+ * @value: (out) (optional) (nullable): returns the value associated with
1946
+ * the key
1947
+ *
1948
+ * Looks up a key in the #QTree, returning the original key and the
1949
+ * associated value. This is useful if you need to free the memory
1950
+ * allocated for the original key, for example before calling
1951
+ * q_tree_remove().
1952
+ *
1953
+ * Returns: %TRUE if the key was found in the #QTree
1954
+ */
1955
+gboolean
1956
+q_tree_lookup_extended(QTree *tree,
1957
+ gconstpointer lookup_key,
1958
+ gpointer *orig_key,
1959
+ gpointer *value)
1960
+{
1961
+ QTreeNode *node;
1962
+
1963
+ g_return_val_if_fail(tree != NULL, FALSE);
1964
+
1965
+ node = q_tree_find_node(tree, lookup_key);
1966
+
1967
+ if (node) {
1968
+ if (orig_key) {
1969
+ *orig_key = node->key;
1970
+ }
1971
+ if (value) {
1972
+ *value = node->value;
1973
+ }
1974
+ return TRUE;
1975
+ } else {
1976
+ return FALSE;
1977
+ }
1978
+}
1979
+
1980
+/**
1981
+ * q_tree_foreach:
1982
+ * @tree: a #QTree
1983
+ * @func: the function to call for each node visited.
1984
+ * If this function returns %TRUE, the traversal is stopped.
1985
+ * @user_data: user data to pass to the function
1986
+ *
1987
+ * Calls the given function for each of the key/value pairs in the #QTree.
1988
+ * The function is passed the key and value of each pair, and the given
1989
+ * @data parameter. The tree is traversed in sorted order.
1990
+ *
1991
+ * The tree may not be modified while iterating over it (you can't
1992
+ * add/remove items). To remove all items matching a predicate, you need
1993
+ * to add each item to a list in your #GTraverseFunc as you walk over
1994
+ * the tree, then walk the list and remove each item.
1995
+ */
1996
+void
1997
+q_tree_foreach(QTree *tree,
1998
+ GTraverseFunc func,
1999
+ gpointer user_data)
2000
+{
2001
+ QTreeNode *node;
2002
+
2003
+ g_return_if_fail(tree != NULL);
2004
+
2005
+ if (!tree->root) {
2006
+ return;
2007
+ }
2008
+
2009
+ node = q_tree_node_first(tree);
2010
+
2011
+ while (node) {
2012
+ if ((*func)(node->key, node->value, user_data)) {
2013
+ break;
2014
+ }
2015
+
2016
+ node = q_tree_node_next(node);
2017
+ }
2018
+}
2019
+
2020
+/**
2021
+ * q_tree_search_node:
2022
+ * @tree: a #QTree
2023
+ * @search_func: a function used to search the #QTree
2024
+ * @user_data: the data passed as the second argument to @search_func
2025
+ *
2026
+ * Searches a #QTree using @search_func.
2027
+ *
2028
+ * The @search_func is called with a pointer to the key of a key/value
2029
+ * pair in the tree, and the passed in @user_data. If @search_func returns
2030
+ * 0 for a key/value pair, then the corresponding node is returned as
2031
+ * the result of q_tree_search(). If @search_func returns -1, searching
2032
+ * will proceed among the key/value pairs that have a smaller key; if
2033
+ * @search_func returns 1, searching will proceed among the key/value
2034
+ * pairs that have a larger key.
2035
+ *
2036
+ * Returns: (nullable) (transfer none): the node corresponding to the
2037
+ * found key, or %NULL if the key was not found
2038
+ *
2039
+ * Since: 2.68 in GLib. Internal in Qtree, i.e. not in the public API.
2040
+ */
2041
+static QTreeNode *
2042
+q_tree_search_node(QTree *tree,
2043
+ GCompareFunc search_func,
2044
+ gconstpointer user_data)
2045
+{
2046
+ g_return_val_if_fail(tree != NULL, NULL);
2047
+
2048
+ if (!tree->root) {
2049
+ return NULL;
2050
+ }
2051
+
2052
+ return q_tree_node_search(tree->root, search_func, user_data);
2053
+}
2054
+
2055
+/**
2056
+ * q_tree_search:
2057
+ * @tree: a #QTree
2058
+ * @search_func: a function used to search the #QTree
2059
+ * @user_data: the data passed as the second argument to @search_func
2060
+ *
2061
+ * Searches a #QTree using @search_func.
2062
+ *
2063
+ * The @search_func is called with a pointer to the key of a key/value
2064
+ * pair in the tree, and the passed in @user_data. If @search_func returns
2065
+ * 0 for a key/value pair, then the corresponding value is returned as
2066
+ * the result of q_tree_search(). If @search_func returns -1, searching
2067
+ * will proceed among the key/value pairs that have a smaller key; if
2068
+ * @search_func returns 1, searching will proceed among the key/value
2069
+ * pairs that have a larger key.
2070
+ *
2071
+ * Returns: the value corresponding to the found key, or %NULL
2072
+ * if the key was not found
2073
+ */
2074
+gpointer
2075
+q_tree_search(QTree *tree,
2076
+ GCompareFunc search_func,
2077
+ gconstpointer user_data)
2078
+{
2079
+ QTreeNode *node;
2080
+
2081
+ node = q_tree_search_node(tree, search_func, user_data);
2082
+
2083
+ return node ? node->value : NULL;
2084
+}
2085
+
2086
+/**
2087
+ * q_tree_height:
2088
+ * @tree: a #QTree
2089
+ *
2090
+ * Gets the height of a #QTree.
2091
+ *
2092
+ * If the #QTree contains no nodes, the height is 0.
2093
+ * If the #QTree contains only one root node the height is 1.
2094
+ * If the root node has children the height is 2, etc.
2095
+ *
2096
+ * Returns: the height of @tree
2097
+ */
2098
+gint
2099
+q_tree_height(QTree *tree)
2100
+{
2101
+ QTreeNode *node;
2102
+ gint height;
2103
+
2104
+ g_return_val_if_fail(tree != NULL, 0);
2105
+
2106
+ if (!tree->root) {
2107
+ return 0;
2108
+ }
2109
+
2110
+ height = 0;
2111
+ node = tree->root;
2112
+
2113
+ while (1) {
2114
+ height += 1 + MAX(node->balance, 0);
2115
+
2116
+ if (!node->left_child) {
2117
+ return height;
2118
+ }
2119
+
2120
+ node = node->left;
2121
+ }
2122
+}
2123
+
2124
+/**
2125
+ * q_tree_nnodes:
2126
+ * @tree: a #QTree
2127
+ *
2128
+ * Gets the number of nodes in a #QTree.
2129
+ *
2130
+ * Returns: the number of nodes in @tree
2131
+ */
2132
+gint
2133
+q_tree_nnodes(QTree *tree)
2134
+{
2135
+ g_return_val_if_fail(tree != NULL, 0);
2136
+
2137
+ return tree->nnodes;
2138
+}
2139
+
2140
+static QTreeNode *
2141
+q_tree_node_balance(QTreeNode *node)
2142
+{
2143
+ if (node->balance < -1) {
2144
+ if (node->left->balance > 0) {
2145
+ node->left = q_tree_node_rotate_left(node->left);
2146
+ }
2147
+ node = q_tree_node_rotate_right(node);
2148
+ } else if (node->balance > 1) {
2149
+ if (node->right->balance < 0) {
2150
+ node->right = q_tree_node_rotate_right(node->right);
2151
+ }
2152
+ node = q_tree_node_rotate_left(node);
2153
+ }
2154
+
2155
+ return node;
2156
+}
2157
+
2158
+static QTreeNode *
2159
+q_tree_find_node(QTree *tree,
2160
+ gconstpointer key)
2161
+{
2162
+ QTreeNode *node;
2163
+ gint cmp;
2164
+
2165
+ node = tree->root;
2166
+ if (!node) {
2167
+ return NULL;
2168
+ }
2169
+
2170
+ while (1) {
2171
+ cmp = tree->key_compare(key, node->key, tree->key_compare_data);
2172
+ if (cmp == 0) {
2173
+ return node;
2174
+ } else if (cmp < 0) {
2175
+ if (!node->left_child) {
2176
+ return NULL;
2177
+ }
2178
+
2179
+ node = node->left;
2180
+ } else {
2181
+ if (!node->right_child) {
2182
+ return NULL;
2183
+ }
2184
+
2185
+ node = node->right;
2186
+ }
2187
+ }
2188
+}
2189
+
2190
+static QTreeNode *
2191
+q_tree_node_search(QTreeNode *node,
2192
+ GCompareFunc search_func,
2193
+ gconstpointer data)
2194
+{
2195
+ gint dir;
2196
+
2197
+ if (!node) {
2198
+ return NULL;
2199
+ }
2200
+
2201
+ while (1) {
2202
+ dir = (*search_func)(node->key, data);
2203
+ if (dir == 0) {
2204
+ return node;
2205
+ } else if (dir < 0) {
2206
+ if (!node->left_child) {
2207
+ return NULL;
2208
+ }
2209
+
2210
+ node = node->left;
2211
+ } else {
2212
+ if (!node->right_child) {
2213
+ return NULL;
2214
+ }
2215
+
2216
+ node = node->right;
2217
+ }
2218
+ }
2219
+}
2220
+
2221
+static QTreeNode *
2222
+q_tree_node_rotate_left(QTreeNode *node)
2223
+{
2224
+ QTreeNode *right;
2225
+ gint a_bal;
2226
+ gint b_bal;
2227
+
2228
+ right = node->right;
2229
+
2230
+ if (right->left_child) {
2231
+ node->right = right->left;
2232
+ } else {
2233
+ node->right_child = FALSE;
2234
+ right->left_child = TRUE;
2235
+ }
2236
+ right->left = node;
2237
+
2238
+ a_bal = node->balance;
2239
+ b_bal = right->balance;
2240
+
2241
+ if (b_bal <= 0) {
2242
+ if (a_bal >= 1) {
2243
+ right->balance = b_bal - 1;
2244
+ } else {
2245
+ right->balance = a_bal + b_bal - 2;
2246
+ }
2247
+ node->balance = a_bal - 1;
2248
+ } else {
2249
+ if (a_bal <= b_bal) {
2250
+ right->balance = a_bal - 2;
2251
+ } else {
2252
+ right->balance = b_bal - 1;
2253
+ }
2254
+ node->balance = a_bal - b_bal - 1;
2255
+ }
2256
+
2257
+ return right;
2258
+}
2259
+
2260
+static QTreeNode *
2261
+q_tree_node_rotate_right(QTreeNode *node)
2262
+{
2263
+ QTreeNode *left;
2264
+ gint a_bal;
2265
+ gint b_bal;
2266
+
2267
+ left = node->left;
2268
+
2269
+ if (left->right_child) {
2270
+ node->left = left->right;
2271
+ } else {
2272
+ node->left_child = FALSE;
2273
+ left->right_child = TRUE;
2274
+ }
2275
+ left->right = node;
2276
+
2277
+ a_bal = node->balance;
2278
+ b_bal = left->balance;
2279
+
2280
+ if (b_bal <= 0) {
2281
+ if (b_bal > a_bal) {
2282
+ left->balance = b_bal + 1;
2283
+ } else {
2284
+ left->balance = a_bal + 2;
2285
+ }
2286
+ node->balance = a_bal - b_bal + 1;
2287
+ } else {
2288
+ if (a_bal <= -1) {
2289
+ left->balance = b_bal + 1;
2290
+ } else {
2291
+ left->balance = a_bal + b_bal + 2;
2292
+ }
2293
+ node->balance = a_bal + 1;
2294
+ }
2295
+
2296
+ return left;
2297
+}
2298
+
2299
+#ifdef Q_TREE_DEBUG
2300
+static gint
2301
+q_tree_node_height(QTreeNode *node)
2302
+{
2303
+ gint left_height;
2304
+ gint right_height;
2305
+
2306
+ if (node) {
2307
+ left_height = 0;
2308
+ right_height = 0;
2309
+
2310
+ if (node->left_child) {
2311
+ left_height = q_tree_node_height(node->left);
2312
+ }
2313
+
2314
+ if (node->right_child) {
2315
+ right_height = q_tree_node_height(node->right);
2316
+ }
2317
+
2318
+ return MAX(left_height, right_height) + 1;
2319
+ }
2320
+
2321
+ return 0;
2322
+}
2323
+
2324
+static void q_tree_node_check(QTreeNode *node)
2325
+{
2326
+ gint left_height;
2327
+ gint right_height;
2328
+ gint balance;
2329
+ QTreeNode *tmp;
2330
+
2331
+ if (node) {
2332
+ if (node->left_child) {
2333
+ tmp = q_tree_node_previous(node);
2334
+ g_assert(tmp->right == node);
2335
+ }
2336
+
2337
+ if (node->right_child) {
2338
+ tmp = q_tree_node_next(node);
2339
+ g_assert(tmp->left == node);
2340
+ }
2341
+
2342
+ left_height = 0;
2343
+ right_height = 0;
2344
+
2345
+ if (node->left_child) {
2346
+ left_height = q_tree_node_height(node->left);
2347
+ }
2348
+ if (node->right_child) {
2349
+ right_height = q_tree_node_height(node->right);
2350
+ }
2351
+
2352
+ balance = right_height - left_height;
2353
+ g_assert(balance == node->balance);
2354
+
2355
+ if (node->left_child) {
2356
+ q_tree_node_check(node->left);
2357
+ }
2358
+ if (node->right_child) {
2359
+ q_tree_node_check(node->right);
2360
+ }
2361
+ }
2362
+}
2363
+#endif
2364
diff --git a/tests/bench/meson.build b/tests/bench/meson.build
2365
index XXXXXXX..XXXXXXX 100644
2366
--- a/tests/bench/meson.build
2367
+++ b/tests/bench/meson.build
2368
@@ -XXX,XX +XXX,XX @@ xbzrle_bench = executable('xbzrle-bench',
2369
dependencies: [qemuutil,migration])
2370
endif
2371
2372
+qtree_bench = executable('qtree-bench',
2373
+ sources: 'qtree-bench.c',
2374
+ dependencies: [qemuutil])
2375
+
2376
executable('atomic_add-bench',
2377
sources: files('atomic_add-bench.c'),
2378
dependencies: [qemuutil],
216
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
2379
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
217
index XXXXXXX..XXXXXXX 100644
2380
index XXXXXXX..XXXXXXX 100644
218
--- a/tests/unit/meson.build
2381
--- a/tests/unit/meson.build
219
+++ b/tests/unit/meson.build
2382
+++ b/tests/unit/meson.build
220
@@ -XXX,XX +XXX,XX @@ tests = {
2383
@@ -XXX,XX +XXX,XX @@ tests = {
221
# all code tested by test-x86-cpuid is inside topology.h
2384
'test-rcu-slist': [],
222
'test-x86-cpuid': [],
2385
'test-qdist': [],
223
'test-cutils': [],
2386
'test-qht': [],
224
+ 'test-div128': [],
2387
+ 'test-qtree': [],
225
'test-shift128': [],
2388
'test-bitops': [],
226
'test-mul64': [],
2389
'test-bitcnt': [],
227
# all code tested by test-int128 is inside int128.h
2390
'test-qgraph': ['../qtest/libqos/qgraph.c'],
2391
diff --git a/util/meson.build b/util/meson.build
2392
index XXXXXXX..XXXXXXX 100644
2393
--- a/util/meson.build
2394
+++ b/util/meson.build
2395
@@ -XXX,XX +XXX,XX @@ util_ss.add(when: 'CONFIG_WIN32', if_true: files('oslib-win32.c'))
2396
util_ss.add(when: 'CONFIG_WIN32', if_true: files('qemu-thread-win32.c'))
2397
util_ss.add(when: 'CONFIG_WIN32', if_true: winmm)
2398
util_ss.add(when: 'CONFIG_WIN32', if_true: pathcch)
2399
+util_ss.add(when: 'HAVE_GLIB_WITH_SLICE_ALLOCATOR', if_true: files('qtree.c'))
2400
util_ss.add(files('envlist.c', 'path.c', 'module.c'))
2401
util_ss.add(files('host-utils.c'))
2402
util_ss.add(files('bitmap.c', 'bitops.c'))
228
--
2403
--
229
2.25.1
2404
2.34.1
230
2405
231
2406
diff view generated by jsdifflib
1
Rather than try to keep these up-to-date across folding,
1
From: Emilio Cota <cota@braap.org>
2
re-read nb_oargs at the end, after re-reading the opcode.
2
3
3
qemu-user can hang in a multi-threaded fork. One common
4
A couple of asserts need dropping, but that will take care
4
reason is that when creating a TB, between fork and exec
5
of itself as we split the function further.
5
we manipulate a GTree whose memory allocator (GSlice) is
6
6
not fork-safe.
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Although POSIX does not mandate it, the system's allocator
9
(e.g. tcmalloc, libc malloc) is probably fork-safe.
10
11
Fix some of these hangs by using QTree, which uses the system's
12
allocator regardless of the Glib version that we used at
13
configuration time.
14
15
Tested with the test program in the original bug report, i.e.:
16
```
17
18
void garble() {
19
int pid = fork();
20
if (pid == 0) {
21
exit(0);
22
} else {
23
int wstatus;
24
waitpid(pid, &wstatus, 0);
25
}
26
}
27
28
void supragarble(unsigned depth) {
29
if (depth == 0)
30
return ;
31
32
std::thread a(supragarble, depth-1);
33
std::thread b(supragarble, depth-1);
34
garble();
35
a.join();
36
b.join();
37
}
38
39
int main() {
40
supragarble(10);
41
}
42
```
43
44
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/285
45
Reported-by: Valentin David <me@valentindavid.com>
46
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
47
Signed-off-by: Emilio Cota <cota@braap.org>
48
Message-Id: <20230205163758.416992-3-cota@braap.org>
49
[rth: Add QEMU_DISABLE_CFI for all callback using functions.]
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
50
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
51
---
11
tcg/optimize.c | 14 ++++----------
52
accel/tcg/tb-maint.c | 17 +++++++++--------
12
1 file changed, 4 insertions(+), 10 deletions(-)
53
tcg/region.c | 19 ++++++++++---------
13
54
util/qtree.c | 8 ++++----
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
55
3 files changed, 23 insertions(+), 21 deletions(-)
56
57
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
15
index XXXXXXX..XXXXXXX 100644
58
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
59
--- a/accel/tcg/tb-maint.c
17
+++ b/tcg/optimize.c
60
+++ b/accel/tcg/tb-maint.c
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
@@ -XXX,XX +XXX,XX @@
19
62
20
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
63
#include "qemu/osdep.h"
21
uint64_t z_mask, partmask, affected, tmp;
64
#include "qemu/interval-tree.h"
22
- int nb_oargs, nb_iargs;
65
+#include "qemu/qtree.h"
23
TCGOpcode opc = op->opc;
66
#include "exec/cputlb.h"
24
const TCGOpDef *def;
67
#include "exec/log.h"
25
68
#include "exec/exec-all.h"
26
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
@@ -XXX,XX +XXX,XX @@ struct page_entry {
27
}
70
* See also: page_collection_lock().
28
71
*/
29
def = &tcg_op_defs[opc];
72
struct page_collection {
30
- nb_oargs = def->nb_oargs;
73
- GTree *tree;
31
- nb_iargs = def->nb_iargs;
74
+ QTree *tree;
32
- init_arguments(&ctx, op, nb_oargs + nb_iargs);
75
struct page_entry *max;
33
- copy_propagate(&ctx, op, nb_oargs, nb_iargs);
76
};
34
+ init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
77
35
+ copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
78
@@ -XXX,XX +XXX,XX @@ static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
36
79
struct page_entry *pe;
37
/* For commutative operations make constant second argument */
80
PageDesc *pd;
38
switch (opc) {
81
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
82
- pe = g_tree_lookup(set->tree, &index);
40
83
+ pe = q_tree_lookup(set->tree, &index);
41
CASE_OP_32_64(qemu_ld):
84
if (pe) {
42
{
85
return false;
43
- MemOpIdx oi = op->args[nb_oargs + nb_iargs];
86
}
44
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
87
@@ -XXX,XX +XXX,XX @@ static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
45
MemOp mop = get_memop(oi);
88
}
46
if (!(mop & MO_SIGN)) {
89
47
z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
90
pe = page_entry_new(pd, index);
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
- g_tree_insert(set->tree, &pe->index, pe);
49
}
92
+ q_tree_insert(set->tree, &pe->index, pe);
50
93
51
if (partmask == 0) {
94
/*
52
- tcg_debug_assert(nb_oargs == 1);
95
* If this is either (1) the first insertion or (2) a page whose index
53
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
96
@@ -XXX,XX +XXX,XX @@ static struct page_collection *page_collection_lock(tb_page_addr_t start,
97
end >>= TARGET_PAGE_BITS;
98
g_assert(start <= end);
99
100
- set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
101
+ set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
102
page_entry_destroy);
103
set->max = NULL;
104
assert_no_pages_locked();
105
106
retry:
107
- g_tree_foreach(set->tree, page_entry_lock, NULL);
108
+ q_tree_foreach(set->tree, page_entry_lock, NULL);
109
110
for (index = start; index <= end; index++) {
111
TranslationBlock *tb;
112
@@ -XXX,XX +XXX,XX @@ static struct page_collection *page_collection_lock(tb_page_addr_t start,
54
continue;
113
continue;
55
}
114
}
56
if (affected == 0) {
115
if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
57
- tcg_debug_assert(nb_oargs == 1);
116
- g_tree_foreach(set->tree, page_entry_unlock, NULL);
58
tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
117
+ q_tree_foreach(set->tree, page_entry_unlock, NULL);
59
continue;
118
goto retry;
60
}
119
}
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
120
assert_page_locked(pd);
62
} else if (args_are_copies(op->args[1], op->args[2])) {
121
@@ -XXX,XX +XXX,XX @@ static struct page_collection *page_collection_lock(tb_page_addr_t start,
63
op->opc = INDEX_op_dup_vec;
122
(tb_page_addr1(tb) != -1 &&
64
TCGOP_VECE(op) = MO_32;
123
page_trylock_add(set, tb_page_addr1(tb)))) {
65
- nb_iargs = 1;
124
/* drop all locks, and reacquire in order */
125
- g_tree_foreach(set->tree, page_entry_unlock, NULL);
126
+ q_tree_foreach(set->tree, page_entry_unlock, NULL);
127
goto retry;
66
}
128
}
67
break;
129
}
68
130
@@ -XXX,XX +XXX,XX @@ static struct page_collection *page_collection_lock(tb_page_addr_t start,
69
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
131
static void page_collection_unlock(struct page_collection *set)
70
op->opc = opc = (opc == INDEX_op_movcond_i32
132
{
71
? INDEX_op_setcond_i32
133
/* entries are unlocked and freed via page_entry_destroy */
72
: INDEX_op_setcond_i64);
134
- g_tree_destroy(set->tree);
73
- nb_iargs = 2;
135
+ q_tree_destroy(set->tree);
74
}
136
g_free(set);
75
break;
137
}
76
138
77
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
139
diff --git a/tcg/region.c b/tcg/region.c
78
if (def->flags & TCG_OPF_BB_END) {
140
index XXXXXXX..XXXXXXX 100644
79
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
141
--- a/tcg/region.c
80
} else {
142
+++ b/tcg/region.c
81
+ int nb_oargs = def->nb_oargs;
143
@@ -XXX,XX +XXX,XX @@
82
for (i = 0; i < nb_oargs; i++) {
144
#include "qemu/mprotect.h"
83
reset_temp(op->args[i]);
145
#include "qemu/memalign.h"
84
/* Save the corresponding known-zero bits mask for the
146
#include "qemu/cacheinfo.h"
147
+#include "qemu/qtree.h"
148
#include "qapi/error.h"
149
#include "exec/exec-all.h"
150
#include "tcg/tcg.h"
151
@@ -XXX,XX +XXX,XX @@
152
153
struct tcg_region_tree {
154
QemuMutex lock;
155
- GTree *tree;
156
+ QTree *tree;
157
/* padding to avoid false sharing is computed at run-time */
158
};
159
160
@@ -XXX,XX +XXX,XX @@ static void tcg_region_trees_init(void)
161
struct tcg_region_tree *rt = region_trees + i * tree_size;
162
163
qemu_mutex_init(&rt->lock);
164
- rt->tree = g_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
165
+ rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
166
}
167
}
168
169
@@ -XXX,XX +XXX,XX @@ void tcg_tb_insert(TranslationBlock *tb)
170
171
g_assert(rt != NULL);
172
qemu_mutex_lock(&rt->lock);
173
- g_tree_insert(rt->tree, &tb->tc, tb);
174
+ q_tree_insert(rt->tree, &tb->tc, tb);
175
qemu_mutex_unlock(&rt->lock);
176
}
177
178
@@ -XXX,XX +XXX,XX @@ void tcg_tb_remove(TranslationBlock *tb)
179
180
g_assert(rt != NULL);
181
qemu_mutex_lock(&rt->lock);
182
- g_tree_remove(rt->tree, &tb->tc);
183
+ q_tree_remove(rt->tree, &tb->tc);
184
qemu_mutex_unlock(&rt->lock);
185
}
186
187
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
188
}
189
190
qemu_mutex_lock(&rt->lock);
191
- tb = g_tree_lookup(rt->tree, &s);
192
+ tb = q_tree_lookup(rt->tree, &s);
193
qemu_mutex_unlock(&rt->lock);
194
return tb;
195
}
196
@@ -XXX,XX +XXX,XX @@ void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
197
for (i = 0; i < region.n; i++) {
198
struct tcg_region_tree *rt = region_trees + i * tree_size;
199
200
- g_tree_foreach(rt->tree, func, user_data);
201
+ q_tree_foreach(rt->tree, func, user_data);
202
}
203
tcg_region_tree_unlock_all();
204
}
205
@@ -XXX,XX +XXX,XX @@ size_t tcg_nb_tbs(void)
206
for (i = 0; i < region.n; i++) {
207
struct tcg_region_tree *rt = region_trees + i * tree_size;
208
209
- nb_tbs += g_tree_nnodes(rt->tree);
210
+ nb_tbs += q_tree_nnodes(rt->tree);
211
}
212
tcg_region_tree_unlock_all();
213
return nb_tbs;
214
@@ -XXX,XX +XXX,XX @@ static void tcg_region_tree_reset_all(void)
215
struct tcg_region_tree *rt = region_trees + i * tree_size;
216
217
/* Increment the refcount first so that destroy acts as a reset */
218
- g_tree_ref(rt->tree);
219
- g_tree_destroy(rt->tree);
220
+ q_tree_ref(rt->tree);
221
+ q_tree_destroy(rt->tree);
222
}
223
tcg_region_tree_unlock_all();
224
}
225
diff --git a/util/qtree.c b/util/qtree.c
226
index XXXXXXX..XXXXXXX 100644
227
--- a/util/qtree.c
228
+++ b/util/qtree.c
229
@@ -XXX,XX +XXX,XX @@ q_tree_node_next(QTreeNode *node)
230
*
231
* Since: 2.70 in GLib. Internal in Qtree, i.e. not in the public API.
232
*/
233
-static void
234
+static void QEMU_DISABLE_CFI
235
q_tree_remove_all(QTree *tree)
236
{
237
QTreeNode *node;
238
@@ -XXX,XX +XXX,XX @@ q_tree_replace(QTree *tree,
239
}
240
241
/* internal insert routine */
242
-static QTreeNode *
243
+static QTreeNode * QEMU_DISABLE_CFI
244
q_tree_insert_internal(QTree *tree,
245
gpointer key,
246
gpointer value,
247
@@ -XXX,XX +XXX,XX @@ q_tree_steal(QTree *tree,
248
}
249
250
/* internal remove routine */
251
-static gboolean
252
+static gboolean QEMU_DISABLE_CFI
253
q_tree_remove_internal(QTree *tree,
254
gconstpointer key,
255
gboolean steal)
256
@@ -XXX,XX +XXX,XX @@ q_tree_node_balance(QTreeNode *node)
257
return node;
258
}
259
260
-static QTreeNode *
261
+static QTreeNode * QEMU_DISABLE_CFI
262
q_tree_find_node(QTree *tree,
263
gconstpointer key)
264
{
85
--
265
--
86
2.25.1
266
2.34.1
87
267
88
268
diff view generated by jsdifflib
1
For constant shifts, we can simply shift the s_mask.
1
We have been enforcing host page alignment for the non-R
2
fallback of MAX_RESERVED_VA, but failing to enforce for -R.
2
3
3
For variable shifts, we know that sar does not reduce
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
4
the s_mask, which helps for sequences like
5
6
ext32s_i64 t, in
7
sar_i64 t, t, v
8
ext32s_i64 out, t
9
10
allowing the final extend to be eliminated.
11
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
6
---
16
tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
7
linux-user/main.c | 6 ++++++
17
1 file changed, 47 insertions(+), 3 deletions(-)
8
1 file changed, 6 insertions(+)
18
9
19
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/linux-user/main.c b/linux-user/main.c
20
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/optimize.c
12
--- a/linux-user/main.c
22
+++ b/tcg/optimize.c
13
+++ b/linux-user/main.c
23
@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
14
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
24
return ~(~0ull >> rep);
15
*/
25
}
16
max_reserved_va = MAX_RESERVED_VA(cpu);
26
17
if (reserved_va != 0) {
27
+/*
18
+ if (reserved_va % qemu_host_page_size) {
28
+ * Recreate a properly left-aligned smask after manipulation.
19
+ char *s = size_to_str(qemu_host_page_size);
29
+ * Some bit-shuffling, particularly shifts and rotates, may
20
+ fprintf(stderr, "Reserved virtual address not aligned mod %s\n", s);
30
+ * retain sign bits on the left, but may scatter disconnected
21
+ g_free(s);
31
+ * sign bits on the right. Retain only what remains to the left.
22
+ exit(EXIT_FAILURE);
32
+ */
33
+static uint64_t smask_from_smask(int64_t smask)
34
+{
35
+ /* Only the 1 bits are significant for smask */
36
+ return smask_from_zmask(~smask);
37
+}
38
+
39
static inline TempOptInfo *ts_info(TCGTemp *ts)
40
{
41
return ts->state_ptr;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
43
44
static bool fold_shift(OptContext *ctx, TCGOp *op)
45
{
46
+ uint64_t s_mask, z_mask, sign;
47
+
48
if (fold_const2(ctx, op) ||
49
fold_ix_to_i(ctx, op, 0) ||
50
fold_xi_to_x(ctx, op, 0)) {
51
return true;
52
}
53
54
+ s_mask = arg_info(op->args[1])->s_mask;
55
+ z_mask = arg_info(op->args[1])->z_mask;
56
+
57
if (arg_is_const(op->args[2])) {
58
- ctx->z_mask = do_constant_folding(op->opc, ctx->type,
59
- arg_info(op->args[1])->z_mask,
60
- arg_info(op->args[2])->val);
61
+ int sh = arg_info(op->args[2])->val;
62
+
63
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
64
+
65
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
66
+ ctx->s_mask = smask_from_smask(s_mask);
67
+
68
return fold_masks(ctx, op);
69
}
70
+
71
+ switch (op->opc) {
72
+ CASE_OP_32_64(sar):
73
+ /*
74
+ * Arithmetic right shift will not reduce the number of
75
+ * input sign repetitions.
76
+ */
77
+ ctx->s_mask = s_mask;
78
+ break;
79
+ CASE_OP_32_64(shr):
80
+ /*
81
+ * If the sign bit is known zero, then logical right shift
82
+ * will not reduced the number of input sign repetitions.
83
+ */
84
+ sign = (s_mask & -s_mask) >> 1;
85
+ if (!(z_mask & sign)) {
86
+ ctx->s_mask = s_mask;
87
+ }
23
+ }
88
+ break;
24
if (max_reserved_va && reserved_va > max_reserved_va) {
89
+ default:
25
fprintf(stderr, "Reserved virtual address too big\n");
90
+ break;
26
exit(EXIT_FAILURE);
91
+ }
92
+
93
return false;
94
}
95
96
--
27
--
97
2.25.1
28
2.34.1
98
29
99
30
diff view generated by jsdifflib
1
Copy z_mask into OptContext, for writeback to the
1
Pass the address of the last byte to be changed, rather than
2
first output within the new function.
2
the first address past the last byte. This avoids overflow
3
3
when the last page of the address space is involved.
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1528
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
9
include/exec/cpu-all.h | 2 +-
9
1 file changed, 33 insertions(+), 16 deletions(-)
10
accel/tcg/user-exec.c | 16 +++++++---------
10
11
bsd-user/mmap.c | 6 +++---
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
linux-user/elfload.c | 11 ++++++-----
12
index XXXXXXX..XXXXXXX 100644
13
linux-user/mmap.c | 16 ++++++++--------
13
--- a/tcg/optimize.c
14
linux-user/syscall.c | 4 ++--
14
+++ b/tcg/optimize.c
15
6 files changed, 27 insertions(+), 28 deletions(-)
15
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
16
16
TCGContext *tcg;
17
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
17
TCGOp *prev_mb;
18
index XXXXXXX..XXXXXXX 100644
18
TCGTempSet temps_used;
19
--- a/include/exec/cpu-all.h
19
+
20
+++ b/include/exec/cpu-all.h
20
+ /* In flight values from optimization. */
21
@@ -XXX,XX +XXX,XX @@ typedef int (*walk_memory_regions_fn)(void *, target_ulong,
21
+ uint64_t z_mask;
22
int walk_memory_regions(void *, walk_memory_regions_fn);
22
} OptContext;
23
23
24
int page_get_flags(target_ulong address);
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
25
-void page_set_flags(target_ulong start, target_ulong end, int flags);
25
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
26
+void page_set_flags(target_ulong start, target_ulong last, int flags);
26
}
27
void page_reset_target_data(target_ulong start, target_ulong end);
27
}
28
int page_check_range(target_ulong start, target_ulong len, int flags);
28
29
29
+static void finish_folding(OptContext *ctx, TCGOp *op)
30
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
30
+{
31
index XXXXXXX..XXXXXXX 100644
31
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
32
--- a/accel/tcg/user-exec.c
32
+ int i, nb_oargs;
33
+++ b/accel/tcg/user-exec.c
33
+
34
@@ -XXX,XX +XXX,XX @@ static bool pageflags_set_clear(target_ulong start, target_ulong last,
34
+ /*
35
* The flag PAGE_WRITE_ORG is positioned automatically depending
35
+ * For an opcode that ends a BB, reset all temp data.
36
* on PAGE_WRITE. The mmap_lock should already be held.
36
+ * We do no cross-BB optimization.
37
*/
37
+ */
38
-void page_set_flags(target_ulong start, target_ulong end, int flags)
38
+ if (def->flags & TCG_OPF_BB_END) {
39
+void page_set_flags(target_ulong start, target_ulong last, int flags)
39
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
40
+ ctx->prev_mb = NULL;
41
+ return;
42
+ }
43
+
44
+ nb_oargs = def->nb_oargs;
45
+ for (i = 0; i < nb_oargs; i++) {
46
+ reset_temp(op->args[i]);
47
+ /*
48
+ * Save the corresponding known-zero bits mask for the
49
+ * first output argument (only one supported so far).
50
+ */
51
+ if (i == 0) {
52
+ arg_info(op->args[i])->z_mask = ctx->z_mask;
53
+ }
54
+ }
55
+}
56
+
57
static bool fold_call(OptContext *ctx, TCGOp *op)
58
{
40
{
59
TCGContext *s = ctx->tcg;
41
- target_ulong last;
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
42
bool reset = false;
61
partmask &= 0xffffffffu;
43
bool inval_tb = false;
62
affected &= 0xffffffffu;
44
63
}
45
/* This function should never be called with addresses outside the
64
+ ctx.z_mask = z_mask;
46
guest address space. If this assert fires, it probably indicates
65
47
a missing call to h2g_valid. */
66
if (partmask == 0) {
48
- assert(start < end);
67
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
49
- assert(end - 1 <= GUEST_ADDR_MAX);
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
50
+ assert(start <= last);
51
+ assert(last <= GUEST_ADDR_MAX);
52
/* Only set PAGE_ANON with new mappings. */
53
assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
54
assert_memory_lock();
55
56
- start = start & TARGET_PAGE_MASK;
57
- end = TARGET_PAGE_ALIGN(end);
58
- last = end - 1;
59
+ start &= TARGET_PAGE_MASK;
60
+ last |= ~TARGET_PAGE_MASK;
61
62
if (!(flags & PAGE_VALID)) {
63
flags = 0;
64
@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
65
}
66
67
if (!flags || reset) {
68
- page_reset_target_data(start, end);
69
+ page_reset_target_data(start, last + 1);
70
inval_tb |= pageflags_unset(start, last);
71
}
72
if (flags) {
73
@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
74
~(reset ? 0 : PAGE_STICKY));
75
}
76
if (inval_tb) {
77
- tb_invalidate_phys_range(start, end);
78
+ tb_invalidate_phys_range(start, last + 1);
79
}
80
}
81
82
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
83
index XXXXXXX..XXXXXXX 100644
84
--- a/bsd-user/mmap.c
85
+++ b/bsd-user/mmap.c
86
@@ -XXX,XX +XXX,XX @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
87
if (ret != 0)
88
goto error;
89
}
90
- page_set_flags(start, start + len, prot | PAGE_VALID);
91
+ page_set_flags(start, start + len - 1, prot | PAGE_VALID);
92
mmap_unlock();
93
return 0;
94
error:
95
@@ -XXX,XX +XXX,XX @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
96
}
97
}
98
the_end1:
99
- page_set_flags(start, start + len, prot | PAGE_VALID);
100
+ page_set_flags(start, start + len - 1, prot | PAGE_VALID);
101
the_end:
102
#ifdef DEBUG_MMAP
103
printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
104
@@ -XXX,XX +XXX,XX @@ int target_munmap(abi_ulong start, abi_ulong len)
105
}
106
107
if (ret == 0) {
108
- page_set_flags(start, start + len, 0);
109
+ page_set_flags(start, start + len - 1, 0);
110
}
111
mmap_unlock();
112
return ret;
113
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/linux-user/elfload.c
116
+++ b/linux-user/elfload.c
117
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
118
exit(EXIT_FAILURE);
119
}
120
page_set_flags(TARGET_VSYSCALL_PAGE,
121
- TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE,
122
+ TARGET_VSYSCALL_PAGE | ~TARGET_PAGE_MASK,
123
PAGE_EXEC | PAGE_VALID);
124
return true;
125
}
126
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
127
exit(EXIT_FAILURE);
128
}
129
130
- page_set_flags(commpage, commpage + qemu_host_page_size,
131
+ page_set_flags(commpage, commpage | ~qemu_host_page_mask,
132
PAGE_READ | PAGE_EXEC | PAGE_VALID);
133
return true;
134
}
135
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
136
exit(EXIT_FAILURE);
137
}
138
139
- page_set_flags(LO_COMMPAGE, LO_COMMPAGE + TARGET_PAGE_SIZE,
140
+ page_set_flags(LO_COMMPAGE, LO_COMMPAGE | ~TARGET_PAGE_MASK,
141
PAGE_READ | PAGE_EXEC | PAGE_VALID);
142
return true;
143
}
144
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
145
* and implement syscalls. Here, simply mark the page executable.
146
* Special case the entry points during translation (see do_page_zero).
147
*/
148
- page_set_flags(LO_COMMPAGE, LO_COMMPAGE + TARGET_PAGE_SIZE,
149
+ page_set_flags(LO_COMMPAGE, LO_COMMPAGE | ~TARGET_PAGE_MASK,
150
PAGE_EXEC | PAGE_VALID);
151
return true;
152
}
153
@@ -XXX,XX +XXX,XX @@ static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
154
155
/* Ensure that the bss page(s) are valid */
156
if ((page_get_flags(last_bss-1) & prot) != prot) {
157
- page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot | PAGE_VALID);
158
+ page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss - 1,
159
+ prot | PAGE_VALID);
160
}
161
162
if (host_start < host_map_start) {
163
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/linux-user/mmap.c
166
+++ b/linux-user/mmap.c
167
@@ -XXX,XX +XXX,XX @@ int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
168
}
169
}
170
171
- page_set_flags(start, start + len, page_flags);
172
+ page_set_flags(start, start + len - 1, page_flags);
173
ret = 0;
174
175
error:
176
@@ -XXX,XX +XXX,XX @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
177
}
178
page_flags |= PAGE_RESET;
179
if (passthrough_start == passthrough_end) {
180
- page_set_flags(start, start + len, page_flags);
181
+ page_set_flags(start, start + len - 1, page_flags);
182
} else {
183
if (start < passthrough_start) {
184
- page_set_flags(start, passthrough_start, page_flags);
185
+ page_set_flags(start, passthrough_start - 1, page_flags);
186
}
187
- page_set_flags(passthrough_start, passthrough_end,
188
+ page_set_flags(passthrough_start, passthrough_end - 1,
189
page_flags | PAGE_PASSTHROUGH);
190
if (passthrough_end < start + len) {
191
- page_set_flags(passthrough_end, start + len, page_flags);
192
+ page_set_flags(passthrough_end, start + len - 1, page_flags);
193
}
194
}
195
the_end:
196
@@ -XXX,XX +XXX,XX @@ int target_munmap(abi_ulong start, abi_ulong len)
197
}
198
199
if (ret == 0) {
200
- page_set_flags(start, start + len, 0);
201
+ page_set_flags(start, start + len - 1, 0);
202
}
203
mmap_unlock();
204
return ret;
205
@@ -XXX,XX +XXX,XX @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
206
} else {
207
new_addr = h2g(host_addr);
208
prot = page_get_flags(old_addr);
209
- page_set_flags(old_addr, old_addr + old_size, 0);
210
- page_set_flags(new_addr, new_addr + new_size,
211
+ page_set_flags(old_addr, old_addr + old_size - 1, 0);
212
+ page_set_flags(new_addr, new_addr + new_size - 1,
213
prot | PAGE_VALID | PAGE_RESET);
214
}
215
mmap_unlock();
216
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
217
index XXXXXXX..XXXXXXX 100644
218
--- a/linux-user/syscall.c
219
+++ b/linux-user/syscall.c
220
@@ -XXX,XX +XXX,XX @@ static inline abi_ulong do_shmat(CPUArchState *cpu_env,
221
}
222
raddr=h2g((unsigned long)host_raddr);
223
224
- page_set_flags(raddr, raddr + shm_info.shm_segsz,
225
+ page_set_flags(raddr, raddr + shm_info.shm_segsz - 1,
226
PAGE_VALID | PAGE_RESET | PAGE_READ |
227
(shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
228
229
@@ -XXX,XX +XXX,XX @@ static inline abi_long do_shmdt(abi_ulong shmaddr)
230
for (i = 0; i < N_SHM_REGIONS; ++i) {
231
if (shm_regions[i].in_use && shm_regions[i].start == shmaddr) {
232
shm_regions[i].in_use = false;
233
- page_set_flags(shmaddr, shmaddr + shm_regions[i].size, 0);
234
+ page_set_flags(shmaddr, shmaddr + shm_regions[i].size - 1, 0);
69
break;
235
break;
70
}
236
}
71
237
}
72
- /* Some of the folding above can change opc. */
73
- opc = op->opc;
74
- def = &tcg_op_defs[opc];
75
- if (def->flags & TCG_OPF_BB_END) {
76
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
77
- } else {
78
- int nb_oargs = def->nb_oargs;
79
- for (i = 0; i < nb_oargs; i++) {
80
- reset_temp(op->args[i]);
81
- /* Save the corresponding known-zero bits mask for the
82
- first output argument (only one supported so far). */
83
- if (i == 0) {
84
- arg_info(op->args[i])->z_mask = z_mask;
85
- }
86
- }
87
- }
88
+ finish_folding(&ctx, op);
89
90
/* Eliminate duplicate and redundant fence instructions. */
91
if (ctx.prev_mb) {
92
--
238
--
93
2.25.1
239
2.34.1
94
240
95
241
diff view generated by jsdifflib
1
The result is either 0 or 1, which means that we have
1
Pass the address of the last byte to be changed, rather than
2
a 2 bit signed result, and thus 62 bits of sign.
2
the first address past the last byte. This avoids overflow
3
For clarity, use the smask_from_zmask function.
3
when the last page of the address space is involved.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 2 ++
8
include/exec/cpu-all.h | 2 +-
10
1 file changed, 2 insertions(+)
9
accel/tcg/user-exec.c | 11 +++++------
10
linux-user/mmap.c | 2 +-
11
3 files changed, 7 insertions(+), 8 deletions(-)
11
12
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
--- a/include/exec/cpu-all.h
15
+++ b/tcg/optimize.c
16
+++ b/include/exec/cpu-all.h
16
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
17
@@ -XXX,XX +XXX,XX @@ int walk_memory_regions(void *, walk_memory_regions_fn);
18
19
int page_get_flags(target_ulong address);
20
void page_set_flags(target_ulong start, target_ulong last, int flags);
21
-void page_reset_target_data(target_ulong start, target_ulong end);
22
+void page_reset_target_data(target_ulong start, target_ulong last);
23
int page_check_range(target_ulong start, target_ulong len, int flags);
24
25
/**
26
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/accel/tcg/user-exec.c
29
+++ b/accel/tcg/user-exec.c
30
@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong last, int flags)
17
}
31
}
18
32
19
ctx->z_mask = 1;
33
if (!flags || reset) {
20
+ ctx->s_mask = smask_from_zmask(1);
34
- page_reset_target_data(start, last + 1);
21
return false;
35
+ page_reset_target_data(start, last);
36
inval_tb |= pageflags_unset(start, last);
37
}
38
if (flags) {
39
@@ -XXX,XX +XXX,XX @@ typedef struct TargetPageDataNode {
40
41
static IntervalTreeRoot targetdata_root;
42
43
-void page_reset_target_data(target_ulong start, target_ulong end)
44
+void page_reset_target_data(target_ulong start, target_ulong last)
45
{
46
IntervalTreeNode *n, *next;
47
- target_ulong last;
48
49
assert_memory_lock();
50
51
- start = start & TARGET_PAGE_MASK;
52
- last = TARGET_PAGE_ALIGN(end) - 1;
53
+ start &= TARGET_PAGE_MASK;
54
+ last |= ~TARGET_PAGE_MASK;
55
56
for (n = interval_tree_iter_first(&targetdata_root, start, last),
57
next = n ? interval_tree_iter_next(n, start, last) : NULL;
58
@@ -XXX,XX +XXX,XX @@ void *page_get_target_data(target_ulong address)
59
return t->data[(page - region) >> TARGET_PAGE_BITS];
22
}
60
}
23
61
#else
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
62
-void page_reset_target_data(target_ulong start, target_ulong end) { }
63
+void page_reset_target_data(target_ulong start, target_ulong last) { }
64
#endif /* TARGET_PAGE_DATA_SIZE */
65
66
/* The softmmu versions of these helpers are in cputlb.c. */
67
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/linux-user/mmap.c
70
+++ b/linux-user/mmap.c
71
@@ -XXX,XX +XXX,XX @@ abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
72
if (can_passthrough_madvise(start, end)) {
73
ret = get_errno(madvise(g2h_untagged(start), len, advice));
74
if ((advice == MADV_DONTNEED) && (ret == 0)) {
75
- page_reset_target_data(start, start + len);
76
+ page_reset_target_data(start, start + len - 1);
77
}
78
}
25
}
79
}
26
27
ctx->z_mask = 1;
28
+ ctx->s_mask = smask_from_zmask(1);
29
return false;
30
31
do_setcond_const:
32
--
80
--
33
2.25.1
81
2.34.1
34
82
35
83
diff view generated by jsdifflib
1
Certain targets, like riscv, produce signed 32-bit results.
1
Pass the address of the last byte to be changed, rather than
2
This can lead to lots of redundant extensions as values are
2
the first address past the last byte. This avoids overflow
3
manipulated.
3
when the last page of the address space is involved.
4
4
5
Begin by tracking only the obvious sign-extensions, and
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
converting them to simple copies when possible.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
7
---
12
tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
8
accel/tcg/tb-maint.c | 28 ++++++++++++++++------------
13
1 file changed, 102 insertions(+), 21 deletions(-)
9
1 file changed, 16 insertions(+), 12 deletions(-)
14
10
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
13
--- a/accel/tcg/tb-maint.c
18
+++ b/tcg/optimize.c
14
+++ b/accel/tcg/tb-maint.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
15
@@ -XXX,XX +XXX,XX @@ static void tb_remove(TranslationBlock *tb)
20
TCGTemp *next_copy;
16
}
21
uint64_t val;
17
22
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
18
/* TODO: For now, still shared with translate-all.c for system mode. */
23
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
19
-#define PAGE_FOR_EACH_TB(start, end, pagedesc, T, N) \
24
} TempOptInfo;
20
- for (T = foreach_tb_first(start, end), \
25
21
- N = foreach_tb_next(T, start, end); \
26
typedef struct OptContext {
22
+#define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N) \
27
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
23
+ for (T = foreach_tb_first(start, last), \
28
/* In flight values from optimization. */
24
+ N = foreach_tb_next(T, start, last); \
29
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
25
T != NULL; \
30
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
26
- T = N, N = foreach_tb_next(N, start, end))
31
+ uint64_t s_mask; /* mask of clrsb(value) bits */
27
+ T = N, N = foreach_tb_next(N, start, last))
32
TCGType type;
28
33
} OptContext;
29
typedef TranslationBlock *PageForEachNext;
34
30
35
+/* Calculate the smask for a specific value. */
31
static PageForEachNext foreach_tb_first(tb_page_addr_t start,
36
+static uint64_t smask_from_value(uint64_t value)
32
- tb_page_addr_t end)
37
+{
33
+ tb_page_addr_t last)
38
+ int rep = clrsb64(value);
39
+ return ~(~0ull >> rep);
40
+}
41
+
42
+/*
43
+ * Calculate the smask for a given set of known-zeros.
44
+ * If there are lots of zeros on the left, we can consider the remainder
45
+ * an unsigned field, and thus the corresponding signed field is one bit
46
+ * larger.
47
+ */
48
+static uint64_t smask_from_zmask(uint64_t zmask)
49
+{
50
+ /*
51
+ * Only the 0 bits are significant for zmask, thus the msb itself
52
+ * must be zero, else we have no sign information.
53
+ */
54
+ int rep = clz64(zmask);
55
+ if (rep == 0) {
56
+ return 0;
57
+ }
58
+ rep -= 1;
59
+ return ~(~0ull >> rep);
60
+}
61
+
62
static inline TempOptInfo *ts_info(TCGTemp *ts)
63
{
34
{
64
return ts->state_ptr;
35
- IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, end - 1);
65
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
36
+ IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
66
ti->prev_copy = ts;
37
return n ? container_of(n, TranslationBlock, itree) : NULL;
67
ti->is_const = false;
68
ti->z_mask = -1;
69
+ ti->s_mask = 0;
70
}
38
}
71
39
72
static void reset_temp(TCGArg arg)
40
static PageForEachNext foreach_tb_next(PageForEachNext tb,
73
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
41
tb_page_addr_t start,
74
ti->is_const = true;
42
- tb_page_addr_t end)
75
ti->val = ts->val;
43
+ tb_page_addr_t last)
76
ti->z_mask = ts->val;
44
{
77
+ ti->s_mask = smask_from_value(ts->val);
45
IntervalTreeNode *n;
78
} else {
46
79
ti->is_const = false;
47
if (tb) {
80
ti->z_mask = -1;
48
- n = interval_tree_iter_next(&tb->itree, start, end - 1);
81
+ ti->s_mask = 0;
49
+ n = interval_tree_iter_next(&tb->itree, start, last);
50
if (n) {
51
return container_of(n, TranslationBlock, itree);
52
}
53
@@ -XXX,XX +XXX,XX @@ struct page_collection {
54
};
55
56
typedef int PageForEachNext;
57
-#define PAGE_FOR_EACH_TB(start, end, pagedesc, tb, n) \
58
+#define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
59
TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
60
61
#ifdef CONFIG_DEBUG_TCG
62
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
63
{
64
TranslationBlock *tb;
65
PageForEachNext n;
66
+ tb_page_addr_t last = end - 1;
67
68
assert_memory_lock();
69
70
- PAGE_FOR_EACH_TB(start, end, unused, tb, n) {
71
+ PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
72
tb_phys_invalidate__locked(tb);
82
}
73
}
83
}
74
}
84
75
@@ -XXX,XX +XXX,XX @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
76
bool current_tb_modified;
86
op->args[1] = src;
77
TranslationBlock *tb;
87
78
PageForEachNext n;
88
di->z_mask = si->z_mask;
79
+ tb_page_addr_t last;
89
+ di->s_mask = si->s_mask;
90
91
if (src_ts->type == dst_ts->type) {
92
TempOptInfo *ni = ts_info(si->next_copy);
93
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
94
95
nb_oargs = def->nb_oargs;
96
for (i = 0; i < nb_oargs; i++) {
97
- reset_temp(op->args[i]);
98
+ TCGTemp *ts = arg_temp(op->args[i]);
99
+ reset_ts(ts);
100
/*
101
- * Save the corresponding known-zero bits mask for the
102
+ * Save the corresponding known-zero/sign bits mask for the
103
* first output argument (only one supported so far).
104
*/
105
if (i == 0) {
106
- arg_info(op->args[i])->z_mask = ctx->z_mask;
107
+ ts_info(ts)->z_mask = ctx->z_mask;
108
+ ts_info(ts)->s_mask = ctx->s_mask;
109
}
110
}
111
}
112
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
113
{
114
uint64_t a_mask = ctx->a_mask;
115
uint64_t z_mask = ctx->z_mask;
116
+ uint64_t s_mask = ctx->s_mask;
117
80
118
/*
81
/*
119
* 32-bit ops generate 32-bit results, which for the purpose of
82
* Without precise smc semantics, or when outside of a TB,
120
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
83
@@ -XXX,XX +XXX,XX @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
121
if (ctx->type == TCG_TYPE_I32) {
84
assert_memory_lock();
122
a_mask = (int32_t)a_mask;
85
current_tb = tcg_tb_lookup(pc);
123
z_mask = (int32_t)z_mask;
86
124
+ s_mask |= MAKE_64BIT_MASK(32, 32);
87
+ last = addr | ~TARGET_PAGE_MASK;
125
ctx->z_mask = z_mask;
88
addr &= TARGET_PAGE_MASK;
126
+ ctx->s_mask = s_mask;
89
current_tb_modified = false;
127
}
90
128
91
- PAGE_FOR_EACH_TB(addr, addr + TARGET_PAGE_SIZE, unused, tb, n) {
129
if (z_mask == 0) {
92
+ PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
130
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
93
if (current_tb == tb &&
131
94
(tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
132
static bool fold_bswap(OptContext *ctx, TCGOp *op)
95
/*
133
{
96
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
134
- uint64_t z_mask, sign;
97
bool current_tb_modified = false;
135
+ uint64_t z_mask, s_mask, sign;
98
TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
136
99
#endif /* TARGET_HAS_PRECISE_SMC */
137
if (arg_is_const(op->args[1])) {
100
+ tb_page_addr_t last G_GNUC_UNUSED = end - 1;
138
uint64_t t = arg_info(op->args[1])->val;
101
139
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
102
/*
140
}
103
* We remove all the TBs in the range [start, end[.
141
104
* XXX: see if in some cases it could be faster to invalidate all the code
142
z_mask = arg_info(op->args[1])->z_mask;
105
*/
143
+
106
- PAGE_FOR_EACH_TB(start, end, p, tb, n) {
144
switch (op->opc) {
107
+ PAGE_FOR_EACH_TB(start, last, p, tb, n) {
145
case INDEX_op_bswap16_i32:
108
/* NOTE: this is subtle as a TB may span two physical pages */
146
case INDEX_op_bswap16_i64:
109
if (n == 0) {
147
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
110
/* NOTE: tb_end may be after the end of the page, but
148
default:
149
g_assert_not_reached();
150
}
151
+ s_mask = smask_from_zmask(z_mask);
152
153
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
154
case TCG_BSWAP_OZ:
155
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
156
/* If the sign bit may be 1, force all the bits above to 1. */
157
if (z_mask & sign) {
158
z_mask |= sign;
159
+ s_mask = sign << 1;
160
}
161
break;
162
default:
163
/* The high bits are undefined: force all bits above the sign to 1. */
164
z_mask |= sign << 1;
165
+ s_mask = 0;
166
break;
167
}
168
ctx->z_mask = z_mask;
169
+ ctx->s_mask = s_mask;
170
171
return fold_masks(ctx, op);
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
174
static bool fold_extract(OptContext *ctx, TCGOp *op)
175
{
176
uint64_t z_mask_old, z_mask;
177
+ int pos = op->args[2];
178
+ int len = op->args[3];
179
180
if (arg_is_const(op->args[1])) {
181
uint64_t t;
182
183
t = arg_info(op->args[1])->val;
184
- t = extract64(t, op->args[2], op->args[3]);
185
+ t = extract64(t, pos, len);
186
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
187
}
188
189
z_mask_old = arg_info(op->args[1])->z_mask;
190
- z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
191
- if (op->args[2] == 0) {
192
+ z_mask = extract64(z_mask_old, pos, len);
193
+ if (pos == 0) {
194
ctx->a_mask = z_mask_old ^ z_mask;
195
}
196
ctx->z_mask = z_mask;
197
+ ctx->s_mask = smask_from_zmask(z_mask);
198
199
return fold_masks(ctx, op);
200
}
201
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
202
203
static bool fold_exts(OptContext *ctx, TCGOp *op)
204
{
205
- uint64_t z_mask_old, z_mask, sign;
206
+ uint64_t s_mask_old, s_mask, z_mask, sign;
207
bool type_change = false;
208
209
if (fold_const1(ctx, op)) {
210
return true;
211
}
212
213
- z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
214
+ z_mask = arg_info(op->args[1])->z_mask;
215
+ s_mask = arg_info(op->args[1])->s_mask;
216
+ s_mask_old = s_mask;
217
218
switch (op->opc) {
219
CASE_OP_32_64(ext8s):
220
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
221
222
if (z_mask & sign) {
223
z_mask |= sign;
224
- } else if (!type_change) {
225
- ctx->a_mask = z_mask_old ^ z_mask;
226
}
227
+ s_mask |= sign << 1;
228
+
229
ctx->z_mask = z_mask;
230
+ ctx->s_mask = s_mask;
231
+ if (!type_change) {
232
+ ctx->a_mask = s_mask & ~s_mask_old;
233
+ }
234
235
return fold_masks(ctx, op);
236
}
237
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
238
}
239
240
ctx->z_mask = z_mask;
241
+ ctx->s_mask = smask_from_zmask(z_mask);
242
if (!type_change) {
243
ctx->a_mask = z_mask_old ^ z_mask;
244
}
245
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
246
MemOp mop = get_memop(oi);
247
int width = 8 * memop_size(mop);
248
249
- if (!(mop & MO_SIGN) && width < 64) {
250
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
251
+ if (width < 64) {
252
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
253
+ if (!(mop & MO_SIGN)) {
254
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
255
+ ctx->s_mask <<= 1;
256
+ }
257
}
258
259
/* Opcodes that touch guest memory stop the mb optimization. */
260
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
261
262
static bool fold_sextract(OptContext *ctx, TCGOp *op)
263
{
264
- int64_t z_mask_old, z_mask;
265
+ uint64_t z_mask, s_mask, s_mask_old;
266
+ int pos = op->args[2];
267
+ int len = op->args[3];
268
269
if (arg_is_const(op->args[1])) {
270
uint64_t t;
271
272
t = arg_info(op->args[1])->val;
273
- t = sextract64(t, op->args[2], op->args[3]);
274
+ t = sextract64(t, pos, len);
275
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
276
}
277
278
- z_mask_old = arg_info(op->args[1])->z_mask;
279
- z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
280
- if (op->args[2] == 0 && z_mask >= 0) {
281
- ctx->a_mask = z_mask_old ^ z_mask;
282
- }
283
+ z_mask = arg_info(op->args[1])->z_mask;
284
+ z_mask = sextract64(z_mask, pos, len);
285
ctx->z_mask = z_mask;
286
287
+ s_mask_old = arg_info(op->args[1])->s_mask;
288
+ s_mask = sextract64(s_mask_old, pos, len);
289
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
290
+ ctx->s_mask = s_mask;
291
+
292
+ if (pos == 0) {
293
+ ctx->a_mask = s_mask & ~s_mask_old;
294
+ }
295
+
296
return fold_masks(ctx, op);
297
}
298
299
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
300
{
301
/* We can't do any folding with a load, but we can record bits. */
302
switch (op->opc) {
303
+ CASE_OP_32_64(ld8s):
304
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
305
+ break;
306
CASE_OP_32_64(ld8u):
307
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
308
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
309
+ break;
310
+ CASE_OP_32_64(ld16s):
311
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
312
break;
313
CASE_OP_32_64(ld16u):
314
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
315
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
316
+ break;
317
+ case INDEX_op_ld32s_i64:
318
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
319
break;
320
case INDEX_op_ld32u_i64:
321
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
322
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
323
break;
324
default:
325
g_assert_not_reached();
326
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
327
ctx.type = TCG_TYPE_I32;
328
}
329
330
- /* Assume all bits affected, and no bits known zero. */
331
+ /* Assume all bits affected, no bits known zero, no sign reps. */
332
ctx.a_mask = -1;
333
ctx.z_mask = -1;
334
+ ctx.s_mask = 0;
335
336
/*
337
* Process each opcode.
338
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
339
case INDEX_op_extrh_i64_i32:
340
done = fold_extu(&ctx, op);
341
break;
342
+ CASE_OP_32_64(ld8s):
343
CASE_OP_32_64(ld8u):
344
+ CASE_OP_32_64(ld16s):
345
CASE_OP_32_64(ld16u):
346
+ case INDEX_op_ld32s_i64:
347
case INDEX_op_ld32u_i64:
348
done = fold_tcg_ld(&ctx, op);
349
break;
350
--
111
--
351
2.25.1
112
2.34.1
352
113
353
114
diff view generated by jsdifflib
1
Adjust the interface to take the OptContext parameter instead
1
Pass the address of the last byte to be changed, rather than
2
of TCGContext or both.
2
the first address past the last byte. This avoids overflow
3
when the last page of the address space is involved.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Fixes a bug in the loop comparision where "<= end" would lock
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
one more page than required.
7
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
11
accel/tcg/tb-maint.c | 22 +++++++++++-----------
9
1 file changed, 34 insertions(+), 33 deletions(-)
12
1 file changed, 11 insertions(+), 11 deletions(-)
10
13
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
16
--- a/accel/tcg/tb-maint.c
14
+++ b/tcg/optimize.c
17
+++ b/accel/tcg/tb-maint.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
18
@@ -XXX,XX +XXX,XX @@ static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
16
} TempOptInfo;
17
18
typedef struct OptContext {
19
+ TCGContext *tcg;
20
TCGTempSet temps_used;
21
} OptContext;
22
23
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
24
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
25
}
19
}
26
20
27
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
21
/*
28
+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
22
- * Lock a range of pages ([@start,@end[) as well as the pages of all
23
+ * Lock a range of pages ([@start,@last]) as well as the pages of all
24
* intersecting TBs.
25
* Locking order: acquire locks in ascending order of page index.
26
*/
27
static struct page_collection *page_collection_lock(tb_page_addr_t start,
28
- tb_page_addr_t end)
29
+ tb_page_addr_t last)
29
{
30
{
30
TCGTemp *dst_ts = arg_temp(dst);
31
struct page_collection *set = g_malloc(sizeof(*set));
31
TCGTemp *src_ts = arg_temp(src);
32
tb_page_addr_t index;
32
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
33
PageDesc *pd;
33
TCGOpcode new_op;
34
34
35
start >>= TARGET_PAGE_BITS;
35
if (ts_are_copies(dst_ts, src_ts)) {
36
- end >>= TARGET_PAGE_BITS;
36
- tcg_op_remove(s, op);
37
- g_assert(start <= end);
37
+ tcg_op_remove(ctx->tcg, op);
38
+ last >>= TARGET_PAGE_BITS;
38
return;
39
+ g_assert(start <= last);
40
41
set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
42
page_entry_destroy);
43
@@ -XXX,XX +XXX,XX @@ static struct page_collection *page_collection_lock(tb_page_addr_t start,
44
retry:
45
q_tree_foreach(set->tree, page_entry_lock, NULL);
46
47
- for (index = start; index <= end; index++) {
48
+ for (index = start; index <= last; index++) {
49
TranslationBlock *tb;
50
PageForEachNext n;
51
52
@@ -XXX,XX +XXX,XX @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
53
void tb_invalidate_phys_page(tb_page_addr_t addr)
54
{
55
struct page_collection *pages;
56
- tb_page_addr_t start, end;
57
+ tb_page_addr_t start, last;
58
PageDesc *p;
59
60
p = page_find(addr >> TARGET_PAGE_BITS);
61
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page(tb_page_addr_t addr)
39
}
62
}
40
63
41
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
64
start = addr & TARGET_PAGE_MASK;
42
}
65
- end = start + TARGET_PAGE_SIZE;
66
- pages = page_collection_lock(start, end);
67
- tb_invalidate_phys_page_range__locked(pages, p, start, end, 0);
68
+ last = addr | ~TARGET_PAGE_MASK;
69
+ pages = page_collection_lock(start, last);
70
+ tb_invalidate_phys_page_range__locked(pages, p, start, last + 1, 0);
71
page_collection_unlock(pages);
43
}
72
}
44
73
45
-static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
74
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
46
- TCGOp *op, TCGArg dst, uint64_t val)
75
struct page_collection *pages;
47
+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
76
tb_page_addr_t next;
48
+ TCGArg dst, uint64_t val)
77
78
- pages = page_collection_lock(start, end);
79
+ pages = page_collection_lock(start, end - 1);
80
for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
81
start < end;
82
start = next, next += TARGET_PAGE_SIZE) {
83
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
49
{
84
{
50
const TCGOpDef *def = &tcg_op_defs[op->opc];
85
struct page_collection *pages;
51
TCGType type;
86
52
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
87
- pages = page_collection_lock(ram_addr, ram_addr + size);
53
/* Convert movi to mov with constant temp. */
88
+ pages = page_collection_lock(ram_addr, ram_addr + size - 1);
54
tv = tcg_constant_internal(type, val);
89
tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
55
init_ts_info(ctx, tv);
90
page_collection_unlock(pages);
56
- tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
57
+ tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
58
}
91
}
59
60
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
{
63
int nb_temps, nb_globals, i;
64
TCGOp *op, *op_next, *prev_mb = NULL;
65
- OptContext ctx = {};
66
+ OptContext ctx = { .tcg = s };
67
68
/* Array VALS has an element for each temp.
69
If this temp holds a constant then its value is kept in VALS' element.
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
CASE_OP_32_64(rotr):
72
if (arg_is_const(op->args[1])
73
&& arg_info(op->args[1])->val == 0) {
74
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
76
continue;
77
}
78
break;
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
if (!arg_is_const(op->args[1])
81
&& arg_is_const(op->args[2])
82
&& arg_info(op->args[2])->val == 0) {
83
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
84
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
85
continue;
86
}
87
break;
88
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
89
if (!arg_is_const(op->args[1])
90
&& arg_is_const(op->args[2])
91
&& arg_info(op->args[2])->val == -1) {
92
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
93
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
94
continue;
95
}
96
break;
97
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
98
99
if (partmask == 0) {
100
tcg_debug_assert(nb_oargs == 1);
101
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
102
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
103
continue;
104
}
105
if (affected == 0) {
106
tcg_debug_assert(nb_oargs == 1);
107
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
108
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
109
continue;
110
}
111
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
CASE_OP_32_64(mulsh):
114
if (arg_is_const(op->args[2])
115
&& arg_info(op->args[2])->val == 0) {
116
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
117
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
118
continue;
119
}
120
break;
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
122
CASE_OP_32_64_VEC(or):
123
CASE_OP_32_64_VEC(and):
124
if (args_are_copies(op->args[1], op->args[2])) {
125
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
126
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
127
continue;
128
}
129
break;
130
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
131
CASE_OP_32_64_VEC(sub):
132
CASE_OP_32_64_VEC(xor):
133
if (args_are_copies(op->args[1], op->args[2])) {
134
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
135
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
136
continue;
137
}
138
break;
139
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
140
allocator where needed and possible. Also detect copies. */
141
switch (opc) {
142
CASE_OP_32_64_VEC(mov):
143
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
144
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
145
continue;
146
147
case INDEX_op_dup_vec:
148
if (arg_is_const(op->args[1])) {
149
tmp = arg_info(op->args[1])->val;
150
tmp = dup_const(TCGOP_VECE(op), tmp);
151
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
153
continue;
154
}
155
break;
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
157
case INDEX_op_dup2_vec:
158
assert(TCG_TARGET_REG_BITS == 32);
159
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
160
- tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
+ tcg_opt_gen_movi(&ctx, op, op->args[0],
162
deposit64(arg_info(op->args[1])->val, 32, 32,
163
arg_info(op->args[2])->val));
164
continue;
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
166
case INDEX_op_extrh_i64_i32:
167
if (arg_is_const(op->args[1])) {
168
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
169
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
171
continue;
172
}
173
break;
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
175
if (arg_is_const(op->args[1])) {
176
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
177
op->args[2]);
178
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
180
continue;
181
}
182
break;
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
185
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
186
arg_info(op->args[2])->val);
187
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
189
continue;
190
}
191
break;
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
193
TCGArg v = arg_info(op->args[1])->val;
194
if (v != 0) {
195
tmp = do_constant_folding(opc, v, 0);
196
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
198
} else {
199
- tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
200
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
201
}
202
continue;
203
}
204
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
205
tmp = deposit64(arg_info(op->args[1])->val,
206
op->args[3], op->args[4],
207
arg_info(op->args[2])->val);
208
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
209
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
210
continue;
211
}
212
break;
213
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
214
if (arg_is_const(op->args[1])) {
215
tmp = extract64(arg_info(op->args[1])->val,
216
op->args[2], op->args[3]);
217
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
218
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
219
continue;
220
}
221
break;
222
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
223
if (arg_is_const(op->args[1])) {
224
tmp = sextract64(arg_info(op->args[1])->val,
225
op->args[2], op->args[3]);
226
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
227
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
228
continue;
229
}
230
break;
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
tmp = (int32_t)(((uint32_t)v1 >> shr) |
233
((uint32_t)v2 << (32 - shr)));
234
}
235
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
236
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
237
continue;
238
}
239
break;
240
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
241
tmp = do_constant_folding_cond(opc, op->args[1],
242
op->args[2], op->args[3]);
243
if (tmp != 2) {
244
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
245
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
246
continue;
247
}
248
break;
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
tmp = do_constant_folding_cond(opc, op->args[1],
251
op->args[2], op->args[5]);
252
if (tmp != 2) {
253
- tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
254
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
255
continue;
256
}
257
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
258
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
259
260
rl = op->args[0];
261
rh = op->args[1];
262
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
263
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
264
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
265
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
266
continue;
267
}
268
break;
269
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
270
271
rl = op->args[0];
272
rh = op->args[1];
273
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
274
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
275
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
276
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
277
continue;
278
}
279
break;
280
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
281
op->args[5]);
282
if (tmp != 2) {
283
do_setcond_const:
284
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
285
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
286
continue;
287
}
288
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
289
--
92
--
290
2.25.1
93
2.34.1
291
94
292
95
diff view generated by jsdifflib
1
Most of these are handled by creating a fold_const2_commutative
1
Pass the address of the last byte to be changed, rather than
2
to handle all of the binary operators. The rest were already
2
the first address past the last byte. This avoids overflow
3
handled on a case-by-case basis in the switch, and have their
3
when the last page of the address space is involved.
4
own fold function in which to place the call.
5
4
6
We now have only one major switch on TCGOpcode.
5
Properly truncate tb_last to the end of the page; the comment about
6
tb_end being past the end of the page being ok is not correct,
7
considering overflow.
7
8
8
Introduce NO_DEST and a block comment for swap_commutative in
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
order to make the handling of brcond and movcond opcodes cleaner.
10
11
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
11
---
14
tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
12
accel/tcg/tb-maint.c | 26 ++++++++++++--------------
15
1 file changed, 70 insertions(+), 72 deletions(-)
13
1 file changed, 12 insertions(+), 14 deletions(-)
16
14
17
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
18
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/optimize.c
17
--- a/accel/tcg/tb-maint.c
20
+++ b/tcg/optimize.c
18
+++ b/accel/tcg/tb-maint.c
21
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
19
@@ -XXX,XX +XXX,XX @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
22
return -1;
20
static void
21
tb_invalidate_phys_page_range__locked(struct page_collection *pages,
22
PageDesc *p, tb_page_addr_t start,
23
- tb_page_addr_t end,
24
+ tb_page_addr_t last,
25
uintptr_t retaddr)
26
{
27
TranslationBlock *tb;
28
- tb_page_addr_t tb_start, tb_end;
29
PageForEachNext n;
30
#ifdef TARGET_HAS_PRECISE_SMC
31
bool current_tb_modified = false;
32
TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
33
#endif /* TARGET_HAS_PRECISE_SMC */
34
- tb_page_addr_t last G_GNUC_UNUSED = end - 1;
35
36
/*
37
- * We remove all the TBs in the range [start, end[.
38
+ * We remove all the TBs in the range [start, last].
39
* XXX: see if in some cases it could be faster to invalidate all the code
40
*/
41
PAGE_FOR_EACH_TB(start, last, p, tb, n) {
42
+ tb_page_addr_t tb_start, tb_last;
43
+
44
/* NOTE: this is subtle as a TB may span two physical pages */
45
+ tb_start = tb_page_addr0(tb);
46
+ tb_last = tb_start + tb->size - 1;
47
if (n == 0) {
48
- /* NOTE: tb_end may be after the end of the page, but
49
- it is not a problem */
50
- tb_start = tb_page_addr0(tb);
51
- tb_end = tb_start + tb->size;
52
+ tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
53
} else {
54
tb_start = tb_page_addr1(tb);
55
- tb_end = tb_start + ((tb_page_addr0(tb) + tb->size)
56
- & ~TARGET_PAGE_MASK);
57
+ tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
58
}
59
- if (!(tb_end <= start || tb_start >= end)) {
60
+ if (!(tb_last < start || tb_start > last)) {
61
#ifdef TARGET_HAS_PRECISE_SMC
62
if (current_tb == tb &&
63
(tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
64
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page(tb_page_addr_t addr)
65
start = addr & TARGET_PAGE_MASK;
66
last = addr | ~TARGET_PAGE_MASK;
67
pages = page_collection_lock(start, last);
68
- tb_invalidate_phys_page_range__locked(pages, p, start, last + 1, 0);
69
+ tb_invalidate_phys_page_range__locked(pages, p, start, last, 0);
70
page_collection_unlock(pages);
23
}
71
}
24
72
25
+/**
73
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
26
+ * swap_commutative:
74
continue;
27
+ * @dest: TCGArg of the destination argument, or NO_DEST.
75
}
28
+ * @p1: first paired argument
76
assert_page_locked(pd);
29
+ * @p2: second paired argument
77
- tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
30
+ *
78
+ tb_invalidate_phys_page_range__locked(pages, pd, start, bound - 1, 0);
31
+ * If *@p1 is a constant and *@p2 is not, swap.
79
}
32
+ * If *@p2 matches @dest, swap.
80
page_collection_unlock(pages);
33
+ * Return true if a swap was performed.
34
+ */
35
+
36
+#define NO_DEST temp_arg(NULL)
37
+
38
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
39
{
40
TCGArg a1 = *p1, a2 = *p2;
41
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
42
return false;
43
}
81
}
44
82
@@ -XXX,XX +XXX,XX @@ static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
45
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
46
+{
47
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
48
+ return fold_const2(ctx, op);
49
+}
50
+
51
static bool fold_masks(OptContext *ctx, TCGOp *op)
52
{
53
uint64_t a_mask = ctx->a_mask;
54
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
55
56
static bool fold_add(OptContext *ctx, TCGOp *op)
57
{
58
- if (fold_const2(ctx, op) ||
59
+ if (fold_const2_commutative(ctx, op) ||
60
fold_xi_to_x(ctx, op, 0)) {
61
return true;
62
}
83
}
63
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
84
64
85
assert_page_locked(p);
65
static bool fold_add2(OptContext *ctx, TCGOp *op)
86
- tb_invalidate_phys_page_range__locked(pages, p, start, start + len, ra);
66
{
87
+ tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
67
+ /* Note that the high and low parts may be independently swapped. */
68
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
69
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
70
+
71
return fold_addsub2(ctx, op, true);
72
}
88
}
73
89
74
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
90
/*
75
{
76
uint64_t z1, z2;
77
78
- if (fold_const2(ctx, op) ||
79
+ if (fold_const2_commutative(ctx, op) ||
80
fold_xi_to_i(ctx, op, 0) ||
81
fold_xi_to_x(ctx, op, -1) ||
82
fold_xx_to_x(ctx, op)) {
83
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
85
{
86
TCGCond cond = op->args[2];
87
- int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
88
+ int i;
89
90
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
91
+ op->args[2] = cond = tcg_swap_cond(cond);
92
+ }
93
+
94
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
95
if (i == 0) {
96
tcg_op_remove(ctx->tcg, op);
97
return true;
98
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
99
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
100
{
101
TCGCond cond = op->args[4];
102
- int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
103
TCGArg label = op->args[5];
104
- int inv = 0;
105
+ int i, inv = 0;
106
107
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
108
+ op->args[4] = cond = tcg_swap_cond(cond);
109
+ }
110
+
111
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
112
if (i >= 0) {
113
goto do_brcond_const;
114
}
115
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
116
117
static bool fold_eqv(OptContext *ctx, TCGOp *op)
118
{
119
- if (fold_const2(ctx, op) ||
120
+ if (fold_const2_commutative(ctx, op) ||
121
fold_xi_to_x(ctx, op, -1) ||
122
fold_xi_to_not(ctx, op, 0)) {
123
return true;
124
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
125
static bool fold_movcond(OptContext *ctx, TCGOp *op)
126
{
127
TCGCond cond = op->args[5];
128
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
129
+ int i;
130
131
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
132
+ op->args[5] = cond = tcg_swap_cond(cond);
133
+ }
134
+ /*
135
+ * Canonicalize the "false" input reg to match the destination reg so
136
+ * that the tcg backend can implement a "move if true" operation.
137
+ */
138
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
139
+ op->args[5] = cond = tcg_invert_cond(cond);
140
+ }
141
+
142
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
143
if (i >= 0) {
144
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
145
}
146
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
147
148
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
149
{
150
- if (fold_const2(ctx, op) ||
151
+ if (fold_const2_commutative(ctx, op) ||
152
fold_xi_to_i(ctx, op, 0)) {
153
return true;
154
}
155
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
156
157
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
158
{
159
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
160
+
161
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
162
uint64_t a = arg_info(op->args[2])->val;
163
uint64_t b = arg_info(op->args[3])->val;
164
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
165
166
static bool fold_nand(OptContext *ctx, TCGOp *op)
167
{
168
- if (fold_const2(ctx, op) ||
169
+ if (fold_const2_commutative(ctx, op) ||
170
fold_xi_to_not(ctx, op, -1)) {
171
return true;
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
174
175
static bool fold_nor(OptContext *ctx, TCGOp *op)
176
{
177
- if (fold_const2(ctx, op) ||
178
+ if (fold_const2_commutative(ctx, op) ||
179
fold_xi_to_not(ctx, op, 0)) {
180
return true;
181
}
182
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
183
184
static bool fold_or(OptContext *ctx, TCGOp *op)
185
{
186
- if (fold_const2(ctx, op) ||
187
+ if (fold_const2_commutative(ctx, op) ||
188
fold_xi_to_x(ctx, op, 0) ||
189
fold_xx_to_x(ctx, op)) {
190
return true;
191
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
192
static bool fold_setcond(OptContext *ctx, TCGOp *op)
193
{
194
TCGCond cond = op->args[3];
195
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
196
+ int i;
197
198
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
199
+ op->args[3] = cond = tcg_swap_cond(cond);
200
+ }
201
+
202
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
203
if (i >= 0) {
204
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
205
}
206
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
207
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
208
{
209
TCGCond cond = op->args[5];
210
- int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
211
- int inv = 0;
212
+ int i, inv = 0;
213
214
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
215
+ op->args[5] = cond = tcg_swap_cond(cond);
216
+ }
217
+
218
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
219
if (i >= 0) {
220
goto do_setcond_const;
221
}
222
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
223
224
static bool fold_xor(OptContext *ctx, TCGOp *op)
225
{
226
- if (fold_const2(ctx, op) ||
227
+ if (fold_const2_commutative(ctx, op) ||
228
fold_xx_to_i(ctx, op, 0) ||
229
fold_xi_to_x(ctx, op, 0) ||
230
fold_xi_to_not(ctx, op, -1)) {
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
ctx.type = TCG_TYPE_I32;
233
}
234
235
- /* For commutative operations make constant second argument */
236
- switch (opc) {
237
- CASE_OP_32_64_VEC(add):
238
- CASE_OP_32_64_VEC(mul):
239
- CASE_OP_32_64_VEC(and):
240
- CASE_OP_32_64_VEC(or):
241
- CASE_OP_32_64_VEC(xor):
242
- CASE_OP_32_64(eqv):
243
- CASE_OP_32_64(nand):
244
- CASE_OP_32_64(nor):
245
- CASE_OP_32_64(muluh):
246
- CASE_OP_32_64(mulsh):
247
- swap_commutative(op->args[0], &op->args[1], &op->args[2]);
248
- break;
249
- CASE_OP_32_64(brcond):
250
- if (swap_commutative(-1, &op->args[0], &op->args[1])) {
251
- op->args[2] = tcg_swap_cond(op->args[2]);
252
- }
253
- break;
254
- CASE_OP_32_64(setcond):
255
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
256
- op->args[3] = tcg_swap_cond(op->args[3]);
257
- }
258
- break;
259
- CASE_OP_32_64(movcond):
260
- if (swap_commutative(-1, &op->args[1], &op->args[2])) {
261
- op->args[5] = tcg_swap_cond(op->args[5]);
262
- }
263
- /* For movcond, we canonicalize the "false" input reg to match
264
- the destination reg so that the tcg backend can implement
265
- a "move if true" operation. */
266
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
267
- op->args[5] = tcg_invert_cond(op->args[5]);
268
- }
269
- break;
270
- CASE_OP_32_64(add2):
271
- swap_commutative(op->args[0], &op->args[2], &op->args[4]);
272
- swap_commutative(op->args[1], &op->args[3], &op->args[5]);
273
- break;
274
- CASE_OP_32_64(mulu2):
275
- CASE_OP_32_64(muls2):
276
- swap_commutative(op->args[0], &op->args[2], &op->args[3]);
277
- break;
278
- case INDEX_op_brcond2_i32:
279
- if (swap_commutative2(&op->args[0], &op->args[2])) {
280
- op->args[4] = tcg_swap_cond(op->args[4]);
281
- }
282
- break;
283
- case INDEX_op_setcond2_i32:
284
- if (swap_commutative2(&op->args[1], &op->args[3])) {
285
- op->args[5] = tcg_swap_cond(op->args[5]);
286
- }
287
- break;
288
- default:
289
- break;
290
- }
291
-
292
/* Assume all bits affected, and no bits known zero. */
293
ctx.a_mask = -1;
294
ctx.z_mask = -1;
295
--
91
--
296
2.25.1
92
2.34.1
297
93
298
94
diff view generated by jsdifflib
1
Provide what will become a larger context for splitting
1
Pass the address of the last byte to be changed, rather than
2
the very large tcg_optimize function.
2
the first address past the last byte. This avoids overflow
3
when the last page of the address space is involved.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
8
include/exec/exec-all.h | 2 +-
10
1 file changed, 40 insertions(+), 37 deletions(-)
9
accel/tcg/tb-maint.c | 31 ++++++++++++++++---------------
10
accel/tcg/translate-all.c | 2 +-
11
accel/tcg/user-exec.c | 2 +-
12
softmmu/physmem.c | 2 +-
13
5 files changed, 20 insertions(+), 19 deletions(-)
11
14
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
17
--- a/include/exec/exec-all.h
15
+++ b/tcg/optimize.c
18
+++ b/include/exec/exec-all.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
19
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_addr(target_ulong addr);
17
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
20
void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
18
} TempOptInfo;
21
#endif
19
22
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
20
+typedef struct OptContext {
23
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end);
21
+ TCGTempSet temps_used;
24
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last);
22
+} OptContext;
25
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
26
27
/* GETPC is the true target of the return instruction that we'll execute. */
28
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/accel/tcg/tb-maint.c
31
+++ b/accel/tcg/tb-maint.c
32
@@ -XXX,XX +XXX,XX @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
33
* Called with mmap_lock held for user-mode emulation.
34
* NOTE: this function must not be called while a TB is running.
35
*/
36
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
37
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
38
{
39
TranslationBlock *tb;
40
PageForEachNext n;
41
- tb_page_addr_t last = end - 1;
42
43
assert_memory_lock();
44
45
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
46
*/
47
void tb_invalidate_phys_page(tb_page_addr_t addr)
48
{
49
- tb_page_addr_t start, end;
50
+ tb_page_addr_t start, last;
51
52
start = addr & TARGET_PAGE_MASK;
53
- end = start + TARGET_PAGE_SIZE;
54
- tb_invalidate_phys_range(start, end);
55
+ last = addr | ~TARGET_PAGE_MASK;
56
+ tb_invalidate_phys_range(start, last);
57
}
58
59
/*
60
@@ -XXX,XX +XXX,XX @@ void tb_invalidate_phys_page(tb_page_addr_t addr)
61
62
/*
63
* Invalidate all TBs which intersect with the target physical address range
64
- * [start;end[. NOTE: start and end may refer to *different* physical pages.
65
+ * [start;last]. NOTE: start and end may refer to *different* physical pages.
66
* 'is_cpu_write_access' should be true if called from a real cpu write
67
* access: the virtual CPU will exit the current TB if code is modified inside
68
* this TB.
69
*/
70
-void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end)
71
+void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
72
{
73
struct page_collection *pages;
74
- tb_page_addr_t next;
75
+ tb_page_addr_t index, index_last;
76
77
- pages = page_collection_lock(start, end - 1);
78
- for (next = (start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
79
- start < end;
80
- start = next, next += TARGET_PAGE_SIZE) {
81
- PageDesc *pd = page_find(start >> TARGET_PAGE_BITS);
82
- tb_page_addr_t bound = MIN(next, end);
83
+ pages = page_collection_lock(start, last);
23
+
84
+
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
85
+ index_last = last >> TARGET_PAGE_BITS;
25
{
86
+ for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
26
return ts->state_ptr;
87
+ PageDesc *pd = page_find(index);
27
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
88
+ tb_page_addr_t bound;
89
90
if (pd == NULL) {
91
continue;
92
}
93
assert_page_locked(pd);
94
- tb_invalidate_phys_page_range__locked(pages, pd, start, bound - 1, 0);
95
+ bound = (index << TARGET_PAGE_BITS) | ~TARGET_PAGE_MASK;
96
+ bound = MIN(bound, last);
97
+ tb_invalidate_phys_page_range__locked(pages, pd, start, bound, 0);
98
}
99
page_collection_unlock(pages);
28
}
100
}
29
101
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
30
/* Initialize and activate a temporary. */
102
index XXXXXXX..XXXXXXX 100644
31
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
103
--- a/accel/tcg/translate-all.c
32
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
104
+++ b/accel/tcg/translate-all.c
33
{
105
@@ -XXX,XX +XXX,XX @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
34
size_t idx = temp_idx(ts);
106
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
35
TempOptInfo *ti;
107
addr = get_page_addr_code(env, pc);
36
108
if (addr != -1) {
37
- if (test_bit(idx, temps_used->l)) {
109
- tb_invalidate_phys_range(addr, addr + 1);
38
+ if (test_bit(idx, ctx->temps_used.l)) {
110
+ tb_invalidate_phys_range(addr, addr);
39
return;
111
}
40
}
41
- set_bit(idx, temps_used->l);
42
+ set_bit(idx, ctx->temps_used.l);
43
44
ti = ts->state_ptr;
45
if (ti == NULL) {
46
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
47
}
112
}
48
}
113
}
49
114
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
50
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
115
index XXXXXXX..XXXXXXX 100644
51
+static void init_arg_info(OptContext *ctx, TCGArg arg)
116
--- a/accel/tcg/user-exec.c
52
{
117
+++ b/accel/tcg/user-exec.c
53
- init_ts_info(temps_used, arg_temp(arg));
118
@@ -XXX,XX +XXX,XX @@ void page_set_flags(target_ulong start, target_ulong last, int flags)
54
+ init_ts_info(ctx, arg_temp(arg));
119
~(reset ? 0 : PAGE_STICKY));
55
}
120
}
56
121
if (inval_tb) {
57
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
122
- tb_invalidate_phys_range(start, last + 1);
58
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
123
+ tb_invalidate_phys_range(start, last);
59
}
124
}
60
}
125
}
61
126
62
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
127
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
63
+static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
128
index XXXXXXX..XXXXXXX 100644
64
TCGOp *op, TCGArg dst, uint64_t val)
129
--- a/softmmu/physmem.c
65
{
130
+++ b/softmmu/physmem.c
66
const TCGOpDef *def = &tcg_op_defs[op->opc];
131
@@ -XXX,XX +XXX,XX @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
67
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
68
69
/* Convert movi to mov with constant temp. */
70
tv = tcg_constant_internal(type, val);
71
- init_ts_info(temps_used, tv);
72
+ init_ts_info(ctx, tv);
73
tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
74
}
75
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
{
78
int nb_temps, nb_globals, i;
79
TCGOp *op, *op_next, *prev_mb = NULL;
80
- TCGTempSet temps_used;
81
+ OptContext ctx = {};
82
83
/* Array VALS has an element for each temp.
84
If this temp holds a constant then its value is kept in VALS' element.
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
nb_temps = s->nb_temps;
87
nb_globals = s->nb_globals;
88
89
- memset(&temps_used, 0, sizeof(temps_used));
90
for (i = 0; i < nb_temps; ++i) {
91
s->temps[i].state_ptr = NULL;
92
}
132
}
93
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
133
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
94
for (i = 0; i < nb_oargs + nb_iargs; i++) {
134
assert(tcg_enabled());
95
TCGTemp *ts = arg_temp(op->args[i]);
135
- tb_invalidate_phys_range(addr, addr + length);
96
if (ts) {
136
+ tb_invalidate_phys_range(addr, addr + length - 1);
97
- init_ts_info(&temps_used, ts);
137
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
98
+ init_ts_info(&ctx, ts);
138
}
99
}
139
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
100
}
101
} else {
102
nb_oargs = def->nb_oargs;
103
nb_iargs = def->nb_iargs;
104
for (i = 0; i < nb_oargs + nb_iargs; i++) {
105
- init_arg_info(&temps_used, op->args[i]);
106
+ init_arg_info(&ctx, op->args[i]);
107
}
108
}
109
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64(rotr):
112
if (arg_is_const(op->args[1])
113
&& arg_info(op->args[1])->val == 0) {
114
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
115
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
116
continue;
117
}
118
break;
119
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
120
121
if (partmask == 0) {
122
tcg_debug_assert(nb_oargs == 1);
123
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
124
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
125
continue;
126
}
127
if (affected == 0) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
129
CASE_OP_32_64(mulsh):
130
if (arg_is_const(op->args[2])
131
&& arg_info(op->args[2])->val == 0) {
132
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
133
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
134
continue;
135
}
136
break;
137
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
138
CASE_OP_32_64_VEC(sub):
139
CASE_OP_32_64_VEC(xor):
140
if (args_are_copies(op->args[1], op->args[2])) {
141
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
142
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
143
continue;
144
}
145
break;
146
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
147
if (arg_is_const(op->args[1])) {
148
tmp = arg_info(op->args[1])->val;
149
tmp = dup_const(TCGOP_VECE(op), tmp);
150
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
151
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
break;
153
}
154
goto do_default;
155
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
156
case INDEX_op_dup2_vec:
157
assert(TCG_TARGET_REG_BITS == 32);
158
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
159
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
160
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
deposit64(arg_info(op->args[1])->val, 32, 32,
162
arg_info(op->args[2])->val));
163
break;
164
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
165
case INDEX_op_extrh_i64_i32:
166
if (arg_is_const(op->args[1])) {
167
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
168
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
169
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
break;
171
}
172
goto do_default;
173
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
174
if (arg_is_const(op->args[1])) {
175
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
176
op->args[2]);
177
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
178
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
break;
180
}
181
goto do_default;
182
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
183
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
184
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
185
arg_info(op->args[2])->val);
186
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
187
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
break;
189
}
190
goto do_default;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
TCGArg v = arg_info(op->args[1])->val;
193
if (v != 0) {
194
tmp = do_constant_folding(opc, v, 0);
195
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
196
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
} else {
198
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
199
}
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
tmp = deposit64(arg_info(op->args[1])->val,
202
op->args[3], op->args[4],
203
arg_info(op->args[2])->val);
204
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
205
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
206
break;
207
}
208
goto do_default;
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
if (arg_is_const(op->args[1])) {
211
tmp = extract64(arg_info(op->args[1])->val,
212
op->args[2], op->args[3]);
213
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
214
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
215
break;
216
}
217
goto do_default;
218
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
219
if (arg_is_const(op->args[1])) {
220
tmp = sextract64(arg_info(op->args[1])->val,
221
op->args[2], op->args[3]);
222
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
223
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
224
break;
225
}
226
goto do_default;
227
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
228
tmp = (int32_t)(((uint32_t)v1 >> shr) |
229
((uint32_t)v2 << (32 - shr)));
230
}
231
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
232
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
233
break;
234
}
235
goto do_default;
236
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
237
tmp = do_constant_folding_cond(opc, op->args[1],
238
op->args[2], op->args[3]);
239
if (tmp != 2) {
240
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
241
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
242
break;
243
}
244
goto do_default;
245
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
246
op->args[1], op->args[2]);
247
if (tmp != 2) {
248
if (tmp) {
249
- memset(&temps_used, 0, sizeof(temps_used));
250
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
251
op->opc = INDEX_op_br;
252
op->args[0] = op->args[3];
253
} else {
254
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
255
256
rl = op->args[0];
257
rh = op->args[1];
258
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
259
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
260
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
261
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
262
break;
263
}
264
goto do_default;
265
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
266
267
rl = op->args[0];
268
rh = op->args[1];
269
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
270
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
271
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
272
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
273
break;
274
}
275
goto do_default;
276
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
277
if (tmp != 2) {
278
if (tmp) {
279
do_brcond_true:
280
- memset(&temps_used, 0, sizeof(temps_used));
281
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
282
op->opc = INDEX_op_br;
283
op->args[0] = op->args[5];
284
} else {
285
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
286
/* Simplify LT/GE comparisons vs zero to a single compare
287
vs the high word of the input. */
288
do_brcond_high:
289
- memset(&temps_used, 0, sizeof(temps_used));
290
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
op->opc = INDEX_op_brcond_i32;
292
op->args[0] = op->args[1];
293
op->args[1] = op->args[3];
294
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
295
goto do_default;
296
}
297
do_brcond_low:
298
- memset(&temps_used, 0, sizeof(temps_used));
299
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
300
op->opc = INDEX_op_brcond_i32;
301
op->args[1] = op->args[2];
302
op->args[2] = op->args[4];
303
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
304
op->args[5]);
305
if (tmp != 2) {
306
do_setcond_const:
307
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
308
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
309
} else if ((op->args[5] == TCG_COND_LT
310
|| op->args[5] == TCG_COND_GE)
311
&& arg_is_const(op->args[3])
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (!(tcg_call_flags(op)
314
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
315
for (i = 0; i < nb_globals; i++) {
316
- if (test_bit(i, temps_used.l)) {
317
+ if (test_bit(i, ctx.temps_used.l)) {
318
reset_ts(&s->temps[i]);
319
}
320
}
321
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
322
block, otherwise we only trash the output args. "z_mask" is
323
the non-zero bits mask for the first output arg. */
324
if (def->flags & TCG_OPF_BB_END) {
325
- memset(&temps_used, 0, sizeof(temps_used));
326
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
327
} else {
328
do_reset_output:
329
for (i = 0; i < nb_oargs; i++) {
330
--
140
--
331
2.25.1
141
2.34.1
332
142
333
143
diff view generated by jsdifflib
1
Prepare for tracking different masks by renaming this one.
1
Pass the address of the last byte of the image, rather than
2
the first address past the last byte. This avoids overflow
3
when the last page of the address space is involved.
2
4
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
7
linux-user/elfload.c | 24 ++++++++++++------------
9
1 file changed, 72 insertions(+), 70 deletions(-)
8
linux-user/flatload.c | 2 +-
9
2 files changed, 13 insertions(+), 13 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/linux-user/elfload.c
14
+++ b/tcg/optimize.c
14
+++ b/linux-user/elfload.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
15
@@ -XXX,XX +XXX,XX @@ static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
16
TCGTemp *prev_copy;
16
if (guest_hiaddr > reserved_va) {
17
TCGTemp *next_copy;
17
error_report("%s: requires more than reserved virtual "
18
uint64_t val;
18
"address space (0x%" PRIx64 " > 0x%lx)",
19
- uint64_t mask;
19
- image_name, (uint64_t)guest_hiaddr, reserved_va);
20
+ uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
20
+ image_name, (uint64_t)guest_hiaddr + 1, reserved_va);
21
} TempOptInfo;
21
exit(EXIT_FAILURE);
22
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
24
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
25
ti->next_copy = ts;
26
ti->prev_copy = ts;
27
ti->is_const = false;
28
- ti->mask = -1;
29
+ ti->z_mask = -1;
30
}
31
32
static void reset_temp(TCGArg arg)
33
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
34
if (ts->kind == TEMP_CONST) {
35
ti->is_const = true;
36
ti->val = ts->val;
37
- ti->mask = ts->val;
38
+ ti->z_mask = ts->val;
39
if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
40
/* High bits of a 32-bit quantity are garbage. */
41
- ti->mask |= ~0xffffffffull;
42
+ ti->z_mask |= ~0xffffffffull;
43
}
22
}
44
} else {
23
} else {
45
ti->is_const = false;
24
@@ -XXX,XX +XXX,XX @@ static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
46
- ti->mask = -1;
25
if ((guest_hiaddr - guest_base) > ~(uintptr_t)0) {
47
+ ti->z_mask = -1;
26
error_report("%s: requires more virtual address space "
27
"than the host can provide (0x%" PRIx64 ")",
28
- image_name, (uint64_t)guest_hiaddr - guest_base);
29
+ image_name, (uint64_t)guest_hiaddr + 1 - guest_base);
30
exit(EXIT_FAILURE);
31
}
32
#endif
33
@@ -XXX,XX +XXX,XX @@ static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
34
if (reserved_va) {
35
guest_loaddr = (guest_base >= mmap_min_addr ? 0
36
: mmap_min_addr - guest_base);
37
- guest_hiaddr = reserved_va;
38
+ guest_hiaddr = reserved_va - 1;
48
}
39
}
40
41
/* Reserve the address space for the binary, or reserved_va. */
42
test = g2h_untagged(guest_loaddr);
43
- addr = mmap(test, guest_hiaddr - guest_loaddr, PROT_NONE, flags, -1, 0);
44
+ addr = mmap(test, guest_hiaddr - guest_loaddr + 1, PROT_NONE, flags, -1, 0);
45
if (test != addr) {
46
pgb_fail_in_use(image_name);
47
}
48
qemu_log_mask(CPU_LOG_PAGE,
49
- "%s: base @ %p for " TARGET_ABI_FMT_ld " bytes\n",
50
- __func__, addr, guest_hiaddr - guest_loaddr);
51
+ "%s: base @ %p for %" PRIu64 " bytes\n",
52
+ __func__, addr, (uint64_t)guest_hiaddr - guest_loaddr + 1);
49
}
53
}
50
54
51
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
55
/**
52
const TCGOpDef *def;
56
@@ -XXX,XX +XXX,XX @@ static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
53
TempOptInfo *di;
57
if (hiaddr != orig_hiaddr) {
54
TempOptInfo *si;
58
error_report("%s: requires virtual address space that the "
55
- uint64_t mask;
59
"host cannot provide (0x%" PRIx64 ")",
56
+ uint64_t z_mask;
60
- image_name, (uint64_t)orig_hiaddr);
57
TCGOpcode new_op;
61
+ image_name, (uint64_t)orig_hiaddr + 1);
58
62
exit(EXIT_FAILURE);
59
if (ts_are_copies(dst_ts, src_ts)) {
60
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
61
op->args[0] = dst;
62
op->args[1] = src;
63
64
- mask = si->mask;
65
+ z_mask = si->z_mask;
66
if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
67
/* High bits of the destination are now garbage. */
68
- mask |= ~0xffffffffull;
69
+ z_mask |= ~0xffffffffull;
70
}
63
}
71
- di->mask = mask;
64
72
+ di->z_mask = z_mask;
65
@@ -XXX,XX +XXX,XX @@ static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
73
66
* arithmetic wraps around.
74
if (src_ts->type == dst_ts->type) {
67
*/
75
TempOptInfo *ni = ts_info(si->next_copy);
68
if (sizeof(uintptr_t) == 8 || loaddr >= 0x80000000u) {
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
- hiaddr = (uintptr_t) 4 << 30;
70
+ hiaddr = UINT32_MAX;
71
} else {
72
offset = -(HI_COMMPAGE & -align);
73
}
74
@@ -XXX,XX +XXX,XX @@ static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
75
loaddr = MIN(loaddr, LO_COMMPAGE & -align);
77
}
76
}
78
77
79
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
78
- addr = pgb_find_hole(loaddr, hiaddr - loaddr, align, offset);
80
- uint64_t mask, partmask, affected, tmp;
79
+ addr = pgb_find_hole(loaddr, hiaddr - loaddr + 1, align, offset);
81
+ uint64_t z_mask, partmask, affected, tmp;
80
if (addr == -1) {
82
int nb_oargs, nb_iargs;
81
/*
83
TCGOpcode opc = op->opc;
82
* If HI_COMMPAGE, there *might* be a non-consecutive allocation
84
const TCGOpDef *def = &tcg_op_defs[opc];
83
@@ -XXX,XX +XXX,XX @@ static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
84
if (guest_hiaddr > reserved_va) {
86
85
error_report("%s: requires more than reserved virtual "
87
/* Simplify using known-zero bits. Currently only ops with a single
86
"address space (0x%" PRIx64 " > 0x%lx)",
88
output argument is supported. */
87
- image_name, (uint64_t)guest_hiaddr, reserved_va);
89
- mask = -1;
88
+ image_name, (uint64_t)guest_hiaddr + 1, reserved_va);
90
+ z_mask = -1;
89
exit(EXIT_FAILURE);
91
affected = -1;
90
}
92
switch (opc) {
91
93
CASE_OP_32_64(ext8s):
92
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
94
- if ((arg_info(op->args[1])->mask & 0x80) != 0) {
93
if (a < loaddr) {
95
+ if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
94
loaddr = a;
96
break;
97
}
95
}
98
QEMU_FALLTHROUGH;
96
- a = eppnt->p_vaddr + eppnt->p_memsz;
99
CASE_OP_32_64(ext8u):
97
+ a = eppnt->p_vaddr + eppnt->p_memsz - 1;
100
- mask = 0xff;
98
if (a > hiaddr) {
101
+ z_mask = 0xff;
99
hiaddr = a;
102
goto and_const;
103
CASE_OP_32_64(ext16s):
104
- if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
105
+ if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
106
break;
107
}
100
}
108
QEMU_FALLTHROUGH;
101
@@ -XXX,XX +XXX,XX @@ static void load_elf_image(const char *image_name, int image_fd,
109
CASE_OP_32_64(ext16u):
102
* In both cases, we will overwrite pages in this range with mappings
110
- mask = 0xffff;
103
* from the executable.
111
+ z_mask = 0xffff;
104
*/
112
goto and_const;
105
- load_addr = target_mmap(loaddr, hiaddr - loaddr, PROT_NONE,
113
case INDEX_op_ext32s_i64:
106
+ load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
114
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
107
MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
115
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
108
(ehdr->e_type == ET_EXEC ? MAP_FIXED : 0),
116
break;
109
-1, 0);
117
}
110
diff --git a/linux-user/flatload.c b/linux-user/flatload.c
118
QEMU_FALLTHROUGH;
111
index XXXXXXX..XXXXXXX 100644
119
case INDEX_op_ext32u_i64:
112
--- a/linux-user/flatload.c
120
- mask = 0xffffffffU;
113
+++ b/linux-user/flatload.c
121
+ z_mask = 0xffffffffU;
114
@@ -XXX,XX +XXX,XX @@ static int load_flat_file(struct linux_binprm * bprm,
122
goto and_const;
115
* Allocate the address space.
123
116
*/
124
CASE_OP_32_64(and):
117
probe_guest_base(bprm->filename, 0,
125
- mask = arg_info(op->args[2])->mask;
118
- text_len + data_len + extra + indx_len);
126
+ z_mask = arg_info(op->args[2])->z_mask;
119
+ text_len + data_len + extra + indx_len - 1);
127
if (arg_is_const(op->args[2])) {
120
128
and_const:
121
/*
129
- affected = arg_info(op->args[1])->mask & ~mask;
122
* there are a couple of cases here, the separate code/data
130
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
131
}
132
- mask = arg_info(op->args[1])->mask & mask;
133
+ z_mask = arg_info(op->args[1])->z_mask & z_mask;
134
break;
135
136
case INDEX_op_ext_i32_i64:
137
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
138
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
139
break;
140
}
141
QEMU_FALLTHROUGH;
142
case INDEX_op_extu_i32_i64:
143
/* We do not compute affected as it is a size changing op. */
144
- mask = (uint32_t)arg_info(op->args[1])->mask;
145
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
146
break;
147
148
CASE_OP_32_64(andc):
149
/* Known-zeros does not imply known-ones. Therefore unless
150
op->args[2] is constant, we can't infer anything from it. */
151
if (arg_is_const(op->args[2])) {
152
- mask = ~arg_info(op->args[2])->mask;
153
+ z_mask = ~arg_info(op->args[2])->z_mask;
154
goto and_const;
155
}
156
/* But we certainly know nothing outside args[1] may be set. */
157
- mask = arg_info(op->args[1])->mask;
158
+ z_mask = arg_info(op->args[1])->z_mask;
159
break;
160
161
case INDEX_op_sar_i32:
162
if (arg_is_const(op->args[2])) {
163
tmp = arg_info(op->args[2])->val & 31;
164
- mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
165
+ z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
166
}
167
break;
168
case INDEX_op_sar_i64:
169
if (arg_is_const(op->args[2])) {
170
tmp = arg_info(op->args[2])->val & 63;
171
- mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
172
+ z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
173
}
174
break;
175
176
case INDEX_op_shr_i32:
177
if (arg_is_const(op->args[2])) {
178
tmp = arg_info(op->args[2])->val & 31;
179
- mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
180
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
181
}
182
break;
183
case INDEX_op_shr_i64:
184
if (arg_is_const(op->args[2])) {
185
tmp = arg_info(op->args[2])->val & 63;
186
- mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
187
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
188
}
189
break;
190
191
case INDEX_op_extrl_i64_i32:
192
- mask = (uint32_t)arg_info(op->args[1])->mask;
193
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
194
break;
195
case INDEX_op_extrh_i64_i32:
196
- mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
197
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
198
break;
199
200
CASE_OP_32_64(shl):
201
if (arg_is_const(op->args[2])) {
202
tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
203
- mask = arg_info(op->args[1])->mask << tmp;
204
+ z_mask = arg_info(op->args[1])->z_mask << tmp;
205
}
206
break;
207
208
CASE_OP_32_64(neg):
209
/* Set to 1 all bits to the left of the rightmost. */
210
- mask = -(arg_info(op->args[1])->mask
211
- & -arg_info(op->args[1])->mask);
212
+ z_mask = -(arg_info(op->args[1])->z_mask
213
+ & -arg_info(op->args[1])->z_mask);
214
break;
215
216
CASE_OP_32_64(deposit):
217
- mask = deposit64(arg_info(op->args[1])->mask,
218
- op->args[3], op->args[4],
219
- arg_info(op->args[2])->mask);
220
+ z_mask = deposit64(arg_info(op->args[1])->z_mask,
221
+ op->args[3], op->args[4],
222
+ arg_info(op->args[2])->z_mask);
223
break;
224
225
CASE_OP_32_64(extract):
226
- mask = extract64(arg_info(op->args[1])->mask,
227
- op->args[2], op->args[3]);
228
+ z_mask = extract64(arg_info(op->args[1])->z_mask,
229
+ op->args[2], op->args[3]);
230
if (op->args[2] == 0) {
231
- affected = arg_info(op->args[1])->mask & ~mask;
232
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
233
}
234
break;
235
CASE_OP_32_64(sextract):
236
- mask = sextract64(arg_info(op->args[1])->mask,
237
- op->args[2], op->args[3]);
238
- if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
239
- affected = arg_info(op->args[1])->mask & ~mask;
240
+ z_mask = sextract64(arg_info(op->args[1])->z_mask,
241
+ op->args[2], op->args[3]);
242
+ if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
243
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
244
}
245
break;
246
247
CASE_OP_32_64(or):
248
CASE_OP_32_64(xor):
249
- mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
250
+ z_mask = arg_info(op->args[1])->z_mask
251
+ | arg_info(op->args[2])->z_mask;
252
break;
253
254
case INDEX_op_clz_i32:
255
case INDEX_op_ctz_i32:
256
- mask = arg_info(op->args[2])->mask | 31;
257
+ z_mask = arg_info(op->args[2])->z_mask | 31;
258
break;
259
260
case INDEX_op_clz_i64:
261
case INDEX_op_ctz_i64:
262
- mask = arg_info(op->args[2])->mask | 63;
263
+ z_mask = arg_info(op->args[2])->z_mask | 63;
264
break;
265
266
case INDEX_op_ctpop_i32:
267
- mask = 32 | 31;
268
+ z_mask = 32 | 31;
269
break;
270
case INDEX_op_ctpop_i64:
271
- mask = 64 | 63;
272
+ z_mask = 64 | 63;
273
break;
274
275
CASE_OP_32_64(setcond):
276
case INDEX_op_setcond2_i32:
277
- mask = 1;
278
+ z_mask = 1;
279
break;
280
281
CASE_OP_32_64(movcond):
282
- mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
283
+ z_mask = arg_info(op->args[3])->z_mask
284
+ | arg_info(op->args[4])->z_mask;
285
break;
286
287
CASE_OP_32_64(ld8u):
288
- mask = 0xff;
289
+ z_mask = 0xff;
290
break;
291
CASE_OP_32_64(ld16u):
292
- mask = 0xffff;
293
+ z_mask = 0xffff;
294
break;
295
case INDEX_op_ld32u_i64:
296
- mask = 0xffffffffu;
297
+ z_mask = 0xffffffffu;
298
break;
299
300
CASE_OP_32_64(qemu_ld):
301
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
302
MemOpIdx oi = op->args[nb_oargs + nb_iargs];
303
MemOp mop = get_memop(oi);
304
if (!(mop & MO_SIGN)) {
305
- mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
306
+ z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
307
}
308
}
309
break;
310
311
CASE_OP_32_64(bswap16):
312
- mask = arg_info(op->args[1])->mask;
313
- if (mask <= 0xffff) {
314
+ z_mask = arg_info(op->args[1])->z_mask;
315
+ if (z_mask <= 0xffff) {
316
op->args[2] |= TCG_BSWAP_IZ;
317
}
318
- mask = bswap16(mask);
319
+ z_mask = bswap16(z_mask);
320
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
321
case TCG_BSWAP_OZ:
322
break;
323
case TCG_BSWAP_OS:
324
- mask = (int16_t)mask;
325
+ z_mask = (int16_t)z_mask;
326
break;
327
default: /* undefined high bits */
328
- mask |= MAKE_64BIT_MASK(16, 48);
329
+ z_mask |= MAKE_64BIT_MASK(16, 48);
330
break;
331
}
332
break;
333
334
case INDEX_op_bswap32_i64:
335
- mask = arg_info(op->args[1])->mask;
336
- if (mask <= 0xffffffffu) {
337
+ z_mask = arg_info(op->args[1])->z_mask;
338
+ if (z_mask <= 0xffffffffu) {
339
op->args[2] |= TCG_BSWAP_IZ;
340
}
341
- mask = bswap32(mask);
342
+ z_mask = bswap32(z_mask);
343
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
344
case TCG_BSWAP_OZ:
345
break;
346
case TCG_BSWAP_OS:
347
- mask = (int32_t)mask;
348
+ z_mask = (int32_t)z_mask;
349
break;
350
default: /* undefined high bits */
351
- mask |= MAKE_64BIT_MASK(32, 32);
352
+ z_mask |= MAKE_64BIT_MASK(32, 32);
353
break;
354
}
355
break;
356
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
357
/* 32-bit ops generate 32-bit results. For the result is zero test
358
below, we can ignore high bits, but for further optimizations we
359
need to record that the high bits contain garbage. */
360
- partmask = mask;
361
+ partmask = z_mask;
362
if (!(def->flags & TCG_OPF_64BIT)) {
363
- mask |= ~(tcg_target_ulong)0xffffffffu;
364
+ z_mask |= ~(tcg_target_ulong)0xffffffffu;
365
partmask &= 0xffffffffu;
366
affected &= 0xffffffffu;
367
}
368
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
369
vs the high word of the input. */
370
do_setcond_high:
371
reset_temp(op->args[0]);
372
- arg_info(op->args[0])->mask = 1;
373
+ arg_info(op->args[0])->z_mask = 1;
374
op->opc = INDEX_op_setcond_i32;
375
op->args[1] = op->args[2];
376
op->args[2] = op->args[4];
377
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
378
}
379
do_setcond_low:
380
reset_temp(op->args[0]);
381
- arg_info(op->args[0])->mask = 1;
382
+ arg_info(op->args[0])->z_mask = 1;
383
op->opc = INDEX_op_setcond_i32;
384
op->args[2] = op->args[3];
385
op->args[3] = op->args[5];
386
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
387
/* Default case: we know nothing about operation (or were unable
388
to compute the operation result) so no propagation is done.
389
We trash everything if the operation is the end of a basic
390
- block, otherwise we only trash the output args. "mask" is
391
+ block, otherwise we only trash the output args. "z_mask" is
392
the non-zero bits mask for the first output arg. */
393
if (def->flags & TCG_OPF_BB_END) {
394
memset(&temps_used, 0, sizeof(temps_used));
395
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
396
/* Save the corresponding known-zero bits mask for the
397
first output argument (only one supported so far). */
398
if (i == 0) {
399
- arg_info(op->args[i])->mask = mask;
400
+ arg_info(op->args[i])->z_mask = z_mask;
401
}
402
}
403
}
404
--
123
--
405
2.25.1
124
2.34.1
406
407
diff view generated by jsdifflib
Deleted patch
1
Break the final cleanup clause out of the main switch
2
statement. When fully folding an opcode to mov/movi,
3
use "continue" to process the next opcode, else break
4
to fall into the final cleanup.
5
1
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
12
1 file changed, 94 insertions(+), 96 deletions(-)
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
19
switch (opc) {
20
CASE_OP_32_64_VEC(mov):
21
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
22
- break;
23
+ continue;
24
25
case INDEX_op_dup_vec:
26
if (arg_is_const(op->args[1])) {
27
tmp = arg_info(op->args[1])->val;
28
tmp = dup_const(TCGOP_VECE(op), tmp);
29
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
30
- break;
31
+ continue;
32
}
33
- goto do_default;
34
+ break;
35
36
case INDEX_op_dup2_vec:
37
assert(TCG_TARGET_REG_BITS == 32);
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
39
tcg_opt_gen_movi(s, &ctx, op, op->args[0],
40
deposit64(arg_info(op->args[1])->val, 32, 32,
41
arg_info(op->args[2])->val));
42
- break;
43
+ continue;
44
} else if (args_are_copies(op->args[1], op->args[2])) {
45
op->opc = INDEX_op_dup_vec;
46
TCGOP_VECE(op) = MO_32;
47
nb_iargs = 1;
48
}
49
- goto do_default;
50
+ break;
51
52
CASE_OP_32_64(not):
53
CASE_OP_32_64(neg):
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
if (arg_is_const(op->args[1])) {
56
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
57
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
58
- break;
59
+ continue;
60
}
61
- goto do_default;
62
+ break;
63
64
CASE_OP_32_64(bswap16):
65
CASE_OP_32_64(bswap32):
66
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
67
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
68
op->args[2]);
69
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
70
- break;
71
+ continue;
72
}
73
- goto do_default;
74
+ break;
75
76
CASE_OP_32_64(add):
77
CASE_OP_32_64(sub):
78
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
79
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
80
arg_info(op->args[2])->val);
81
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
82
- break;
83
+ continue;
84
}
85
- goto do_default;
86
+ break;
87
88
CASE_OP_32_64(clz):
89
CASE_OP_32_64(ctz):
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
} else {
92
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
93
}
94
- break;
95
+ continue;
96
}
97
- goto do_default;
98
+ break;
99
100
CASE_OP_32_64(deposit):
101
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
102
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
103
op->args[3], op->args[4],
104
arg_info(op->args[2])->val);
105
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
106
- break;
107
+ continue;
108
}
109
- goto do_default;
110
+ break;
111
112
CASE_OP_32_64(extract):
113
if (arg_is_const(op->args[1])) {
114
tmp = extract64(arg_info(op->args[1])->val,
115
op->args[2], op->args[3]);
116
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
117
- break;
118
+ continue;
119
}
120
- goto do_default;
121
+ break;
122
123
CASE_OP_32_64(sextract):
124
if (arg_is_const(op->args[1])) {
125
tmp = sextract64(arg_info(op->args[1])->val,
126
op->args[2], op->args[3]);
127
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
128
- break;
129
+ continue;
130
}
131
- goto do_default;
132
+ break;
133
134
CASE_OP_32_64(extract2):
135
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
136
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
((uint32_t)v2 << (32 - shr)));
138
}
139
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
140
- break;
141
+ continue;
142
}
143
- goto do_default;
144
+ break;
145
146
CASE_OP_32_64(setcond):
147
tmp = do_constant_folding_cond(opc, op->args[1],
148
op->args[2], op->args[3]);
149
if (tmp != 2) {
150
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
151
- break;
152
+ continue;
153
}
154
- goto do_default;
155
+ break;
156
157
CASE_OP_32_64(brcond):
158
tmp = do_constant_folding_cond(opc, op->args[0],
159
op->args[1], op->args[2]);
160
- if (tmp != 2) {
161
- if (tmp) {
162
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
163
- op->opc = INDEX_op_br;
164
- op->args[0] = op->args[3];
165
- } else {
166
- tcg_op_remove(s, op);
167
- }
168
+ switch (tmp) {
169
+ case 0:
170
+ tcg_op_remove(s, op);
171
+ continue;
172
+ case 1:
173
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
174
+ op->opc = opc = INDEX_op_br;
175
+ op->args[0] = op->args[3];
176
break;
177
}
178
- goto do_default;
179
+ break;
180
181
CASE_OP_32_64(movcond):
182
tmp = do_constant_folding_cond(opc, op->args[1],
183
op->args[2], op->args[5]);
184
if (tmp != 2) {
185
tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
186
- break;
187
+ continue;
188
}
189
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
190
uint64_t tv = arg_info(op->args[3])->val;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
if (fv == 1 && tv == 0) {
193
cond = tcg_invert_cond(cond);
194
} else if (!(tv == 1 && fv == 0)) {
195
- goto do_default;
196
+ break;
197
}
198
op->args[3] = cond;
199
op->opc = opc = (opc == INDEX_op_movcond_i32
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
: INDEX_op_setcond_i64);
202
nb_iargs = 2;
203
}
204
- goto do_default;
205
+ break;
206
207
case INDEX_op_add2_i32:
208
case INDEX_op_sub2_i32:
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
rh = op->args[1];
211
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
212
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
213
- break;
214
+ continue;
215
}
216
- goto do_default;
217
+ break;
218
219
case INDEX_op_mulu2_i32:
220
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
221
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
222
rh = op->args[1];
223
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
224
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
225
- break;
226
+ continue;
227
}
228
- goto do_default;
229
+ break;
230
231
case INDEX_op_brcond2_i32:
232
tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
233
op->args[4]);
234
- if (tmp != 2) {
235
- if (tmp) {
236
- do_brcond_true:
237
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
238
- op->opc = INDEX_op_br;
239
- op->args[0] = op->args[5];
240
- } else {
241
+ if (tmp == 0) {
242
do_brcond_false:
243
- tcg_op_remove(s, op);
244
- }
245
- } else if ((op->args[4] == TCG_COND_LT
246
- || op->args[4] == TCG_COND_GE)
247
- && arg_is_const(op->args[2])
248
- && arg_info(op->args[2])->val == 0
249
- && arg_is_const(op->args[3])
250
- && arg_info(op->args[3])->val == 0) {
251
+ tcg_op_remove(s, op);
252
+ continue;
253
+ }
254
+ if (tmp == 1) {
255
+ do_brcond_true:
256
+ op->opc = opc = INDEX_op_br;
257
+ op->args[0] = op->args[5];
258
+ break;
259
+ }
260
+ if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
261
+ && arg_is_const(op->args[2])
262
+ && arg_info(op->args[2])->val == 0
263
+ && arg_is_const(op->args[3])
264
+ && arg_info(op->args[3])->val == 0) {
265
/* Simplify LT/GE comparisons vs zero to a single compare
266
vs the high word of the input. */
267
do_brcond_high:
268
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
269
- op->opc = INDEX_op_brcond_i32;
270
+ op->opc = opc = INDEX_op_brcond_i32;
271
op->args[0] = op->args[1];
272
op->args[1] = op->args[3];
273
op->args[2] = op->args[4];
274
op->args[3] = op->args[5];
275
- } else if (op->args[4] == TCG_COND_EQ) {
276
+ break;
277
+ }
278
+ if (op->args[4] == TCG_COND_EQ) {
279
/* Simplify EQ comparisons where one of the pairs
280
can be simplified. */
281
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
283
if (tmp == 0) {
284
goto do_brcond_false;
285
} else if (tmp != 1) {
286
- goto do_default;
287
+ break;
288
}
289
do_brcond_low:
290
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
292
op->args[1] = op->args[2];
293
op->args[2] = op->args[4];
294
op->args[3] = op->args[5];
295
- } else if (op->args[4] == TCG_COND_NE) {
296
+ break;
297
+ }
298
+ if (op->args[4] == TCG_COND_NE) {
299
/* Simplify NE comparisons where one of the pairs
300
can be simplified. */
301
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
302
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
303
} else if (tmp == 1) {
304
goto do_brcond_true;
305
}
306
- goto do_default;
307
- } else {
308
- goto do_default;
309
}
310
break;
311
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (tmp != 2) {
314
do_setcond_const:
315
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
316
- } else if ((op->args[5] == TCG_COND_LT
317
- || op->args[5] == TCG_COND_GE)
318
- && arg_is_const(op->args[3])
319
- && arg_info(op->args[3])->val == 0
320
- && arg_is_const(op->args[4])
321
- && arg_info(op->args[4])->val == 0) {
322
+ continue;
323
+ }
324
+ if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
325
+ && arg_is_const(op->args[3])
326
+ && arg_info(op->args[3])->val == 0
327
+ && arg_is_const(op->args[4])
328
+ && arg_info(op->args[4])->val == 0) {
329
/* Simplify LT/GE comparisons vs zero to a single compare
330
vs the high word of the input. */
331
do_setcond_high:
332
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
333
op->args[1] = op->args[2];
334
op->args[2] = op->args[4];
335
op->args[3] = op->args[5];
336
- } else if (op->args[5] == TCG_COND_EQ) {
337
+ break;
338
+ }
339
+ if (op->args[5] == TCG_COND_EQ) {
340
/* Simplify EQ comparisons where one of the pairs
341
can be simplified. */
342
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
343
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
344
if (tmp == 0) {
345
goto do_setcond_high;
346
} else if (tmp != 1) {
347
- goto do_default;
348
+ break;
349
}
350
do_setcond_low:
351
reset_temp(op->args[0]);
352
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
353
op->opc = INDEX_op_setcond_i32;
354
op->args[2] = op->args[3];
355
op->args[3] = op->args[5];
356
- } else if (op->args[5] == TCG_COND_NE) {
357
+ break;
358
+ }
359
+ if (op->args[5] == TCG_COND_NE) {
360
/* Simplify NE comparisons where one of the pairs
361
can be simplified. */
362
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
363
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
364
} else if (tmp == 1) {
365
goto do_setcond_const;
366
}
367
- goto do_default;
368
- } else {
369
- goto do_default;
370
}
371
break;
372
373
- case INDEX_op_call:
374
- if (!(tcg_call_flags(op)
375
+ default:
376
+ break;
377
+ }
378
+
379
+ /* Some of the folding above can change opc. */
380
+ opc = op->opc;
381
+ def = &tcg_op_defs[opc];
382
+ if (def->flags & TCG_OPF_BB_END) {
383
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
384
+ } else {
385
+ if (opc == INDEX_op_call &&
386
+ !(tcg_call_flags(op)
387
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
388
for (i = 0; i < nb_globals; i++) {
389
if (test_bit(i, ctx.temps_used.l)) {
390
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
391
}
392
}
393
}
394
- goto do_reset_output;
395
396
- default:
397
- do_default:
398
- /* Default case: we know nothing about operation (or were unable
399
- to compute the operation result) so no propagation is done.
400
- We trash everything if the operation is the end of a basic
401
- block, otherwise we only trash the output args. "z_mask" is
402
- the non-zero bits mask for the first output arg. */
403
- if (def->flags & TCG_OPF_BB_END) {
404
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
405
- } else {
406
- do_reset_output:
407
- for (i = 0; i < nb_oargs; i++) {
408
- reset_temp(op->args[i]);
409
- /* Save the corresponding known-zero bits mask for the
410
- first output argument (only one supported so far). */
411
- if (i == 0) {
412
- arg_info(op->args[i])->z_mask = z_mask;
413
- }
414
+ for (i = 0; i < nb_oargs; i++) {
415
+ reset_temp(op->args[i]);
416
+ /* Save the corresponding known-zero bits mask for the
417
+ first output argument (only one supported so far). */
418
+ if (i == 0) {
419
+ arg_info(op->args[i])->z_mask = z_mask;
420
}
421
}
422
- break;
423
}
424
425
/* Eliminate duplicate and redundant fence instructions. */
426
--
427
2.25.1
428
429
diff view generated by jsdifflib
Deleted patch
1
This will expose the variable to subroutines that
2
will be broken out of tcg_optimize.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 11 ++++++-----
10
1 file changed, 6 insertions(+), 5 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
17
18
typedef struct OptContext {
19
TCGContext *tcg;
20
+ TCGOp *prev_mb;
21
TCGTempSet temps_used;
22
} OptContext;
23
24
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
25
void tcg_optimize(TCGContext *s)
26
{
27
int nb_temps, nb_globals, i;
28
- TCGOp *op, *op_next, *prev_mb = NULL;
29
+ TCGOp *op, *op_next;
30
OptContext ctx = { .tcg = s };
31
32
/* Array VALS has an element for each temp.
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
34
}
35
36
/* Eliminate duplicate and redundant fence instructions. */
37
- if (prev_mb) {
38
+ if (ctx.prev_mb) {
39
switch (opc) {
40
case INDEX_op_mb:
41
/* Merge two barriers of the same type into one,
42
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
43
* barrier. This is stricter than specified but for
44
* the purposes of TCG is better than not optimizing.
45
*/
46
- prev_mb->args[0] |= op->args[0];
47
+ ctx.prev_mb->args[0] |= op->args[0];
48
tcg_op_remove(s, op);
49
break;
50
51
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
52
case INDEX_op_qemu_st_i64:
53
case INDEX_op_call:
54
/* Opcodes that touch guest memory stop the optimization. */
55
- prev_mb = NULL;
56
+ ctx.prev_mb = NULL;
57
break;
58
}
59
} else if (opc == INDEX_op_mb) {
60
- prev_mb = op;
61
+ ctx.prev_mb = op;
62
}
63
}
64
}
65
--
66
2.25.1
67
68
diff view generated by jsdifflib
Deleted patch
1
There was no real reason for calls to have separate code here.
2
Unify init for calls vs non-calls using the call path, which
3
handles TCG_CALL_DUMMY_ARG.
4
1
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/optimize.c | 25 +++++++++++--------------
11
1 file changed, 11 insertions(+), 14 deletions(-)
12
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/optimize.c
16
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
18
}
19
}
20
21
-static void init_arg_info(OptContext *ctx, TCGArg arg)
22
-{
23
- init_ts_info(ctx, arg_temp(arg));
24
-}
25
-
26
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
27
{
28
TCGTemp *i, *g, *l;
29
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
30
return false;
31
}
32
33
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
34
+{
35
+ for (int i = 0; i < nb_args; i++) {
36
+ TCGTemp *ts = arg_temp(op->args[i]);
37
+ if (ts) {
38
+ init_ts_info(ctx, ts);
39
+ }
40
+ }
41
+}
42
+
43
/* Propagate constants and copies, fold constant expressions. */
44
void tcg_optimize(TCGContext *s)
45
{
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
47
if (opc == INDEX_op_call) {
48
nb_oargs = TCGOP_CALLO(op);
49
nb_iargs = TCGOP_CALLI(op);
50
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
51
- TCGTemp *ts = arg_temp(op->args[i]);
52
- if (ts) {
53
- init_ts_info(&ctx, ts);
54
- }
55
- }
56
} else {
57
nb_oargs = def->nb_oargs;
58
nb_iargs = def->nb_iargs;
59
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
60
- init_arg_info(&ctx, op->args[i]);
61
- }
62
}
63
+ init_arguments(&ctx, op, nb_oargs + nb_iargs);
64
65
/* Do copy propagation */
66
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
67
--
68
2.25.1
69
70
diff view generated by jsdifflib
Deleted patch
1
Continue splitting tcg_optimize.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 22 ++++++++++++++--------
9
1 file changed, 14 insertions(+), 8 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
16
}
17
}
18
19
+static void copy_propagate(OptContext *ctx, TCGOp *op,
20
+ int nb_oargs, int nb_iargs)
21
+{
22
+ TCGContext *s = ctx->tcg;
23
+
24
+ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
25
+ TCGTemp *ts = arg_temp(op->args[i]);
26
+ if (ts && ts_is_copy(ts)) {
27
+ op->args[i] = temp_arg(find_better_copy(s, ts));
28
+ }
29
+ }
30
+}
31
+
32
/* Propagate constants and copies, fold constant expressions. */
33
void tcg_optimize(TCGContext *s)
34
{
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
36
nb_iargs = def->nb_iargs;
37
}
38
init_arguments(&ctx, op, nb_oargs + nb_iargs);
39
-
40
- /* Do copy propagation */
41
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
42
- TCGTemp *ts = arg_temp(op->args[i]);
43
- if (ts && ts_is_copy(ts)) {
44
- op->args[i] = temp_arg(find_better_copy(s, ts));
45
- }
46
- }
47
+ copy_propagate(&ctx, op, nb_oargs, nb_iargs);
48
49
/* For commutative operations make constant second argument */
50
switch (opc) {
51
--
52
2.25.1
53
54
diff view generated by jsdifflib
Deleted patch
1
Calls are special in that they have a variable number
2
of arguments, and need to be able to clobber globals.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
9
1 file changed, 41 insertions(+), 22 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
16
}
17
}
18
19
+static bool fold_call(OptContext *ctx, TCGOp *op)
20
+{
21
+ TCGContext *s = ctx->tcg;
22
+ int nb_oargs = TCGOP_CALLO(op);
23
+ int nb_iargs = TCGOP_CALLI(op);
24
+ int flags, i;
25
+
26
+ init_arguments(ctx, op, nb_oargs + nb_iargs);
27
+ copy_propagate(ctx, op, nb_oargs, nb_iargs);
28
+
29
+ /* If the function reads or writes globals, reset temp data. */
30
+ flags = tcg_call_flags(op);
31
+ if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
32
+ int nb_globals = s->nb_globals;
33
+
34
+ for (i = 0; i < nb_globals; i++) {
35
+ if (test_bit(i, ctx->temps_used.l)) {
36
+ reset_ts(&ctx->tcg->temps[i]);
37
+ }
38
+ }
39
+ }
40
+
41
+ /* Reset temp data for outputs. */
42
+ for (i = 0; i < nb_oargs; i++) {
43
+ reset_temp(op->args[i]);
44
+ }
45
+
46
+ /* Stop optimizing MB across calls. */
47
+ ctx->prev_mb = NULL;
48
+ return true;
49
+}
50
+
51
/* Propagate constants and copies, fold constant expressions. */
52
void tcg_optimize(TCGContext *s)
53
{
54
- int nb_temps, nb_globals, i;
55
+ int nb_temps, i;
56
TCGOp *op, *op_next;
57
OptContext ctx = { .tcg = s };
58
59
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
60
available through the doubly linked circular list. */
61
62
nb_temps = s->nb_temps;
63
- nb_globals = s->nb_globals;
64
-
65
for (i = 0; i < nb_temps; ++i) {
66
s->temps[i].state_ptr = NULL;
67
}
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
uint64_t z_mask, partmask, affected, tmp;
70
int nb_oargs, nb_iargs;
71
TCGOpcode opc = op->opc;
72
- const TCGOpDef *def = &tcg_op_defs[opc];
73
+ const TCGOpDef *def;
74
75
- /* Count the arguments, and initialize the temps that are
76
- going to be used */
77
+ /* Calls are special. */
78
if (opc == INDEX_op_call) {
79
- nb_oargs = TCGOP_CALLO(op);
80
- nb_iargs = TCGOP_CALLI(op);
81
- } else {
82
- nb_oargs = def->nb_oargs;
83
- nb_iargs = def->nb_iargs;
84
+ fold_call(&ctx, op);
85
+ continue;
86
}
87
+
88
+ def = &tcg_op_defs[opc];
89
+ nb_oargs = def->nb_oargs;
90
+ nb_iargs = def->nb_iargs;
91
init_arguments(&ctx, op, nb_oargs + nb_iargs);
92
copy_propagate(&ctx, op, nb_oargs, nb_iargs);
93
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
95
if (def->flags & TCG_OPF_BB_END) {
96
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
97
} else {
98
- if (opc == INDEX_op_call &&
99
- !(tcg_call_flags(op)
100
- & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
101
- for (i = 0; i < nb_globals; i++) {
102
- if (test_bit(i, ctx.temps_used.l)) {
103
- reset_ts(&s->temps[i]);
104
- }
105
- }
106
- }
107
-
108
for (i = 0; i < nb_oargs; i++) {
109
reset_temp(op->args[i]);
110
/* Save the corresponding known-zero bits mask for the
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
case INDEX_op_qemu_st_i32:
113
case INDEX_op_qemu_st8_i32:
114
case INDEX_op_qemu_st_i64:
115
- case INDEX_op_call:
116
/* Opcodes that touch guest memory stop the optimization. */
117
ctx.prev_mb = NULL;
118
break;
119
--
120
2.25.1
121
122
diff view generated by jsdifflib
Deleted patch
1
Return -1 instead of 2 for failure, so that we can
2
use comparisons against 0 for all cases.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
9
1 file changed, 74 insertions(+), 71 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
16
}
17
}
18
19
-/* Return 2 if the condition can't be simplified, and the result
20
- of the condition (0 or 1) if it can */
21
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
22
- TCGArg y, TCGCond c)
23
+/*
24
+ * Return -1 if the condition can't be simplified,
25
+ * and the result of the condition (0 or 1) if it can.
26
+ */
27
+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
28
+ TCGArg y, TCGCond c)
29
{
30
uint64_t xv = arg_info(x)->val;
31
uint64_t yv = arg_info(y)->val;
32
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
33
case TCG_COND_GEU:
34
return 1;
35
default:
36
- return 2;
37
+ return -1;
38
}
39
}
40
- return 2;
41
+ return -1;
42
}
43
44
-/* Return 2 if the condition can't be simplified, and the result
45
- of the condition (0 or 1) if it can */
46
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
47
+/*
48
+ * Return -1 if the condition can't be simplified,
49
+ * and the result of the condition (0 or 1) if it can.
50
+ */
51
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
52
{
53
TCGArg al = p1[0], ah = p1[1];
54
TCGArg bl = p2[0], bh = p2[1];
55
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
56
if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
57
return do_constant_folding_cond_eq(c);
58
}
59
- return 2;
60
+ return -1;
61
}
62
63
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
65
break;
66
67
CASE_OP_32_64(setcond):
68
- tmp = do_constant_folding_cond(opc, op->args[1],
69
- op->args[2], op->args[3]);
70
- if (tmp != 2) {
71
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
72
+ i = do_constant_folding_cond(opc, op->args[1],
73
+ op->args[2], op->args[3]);
74
+ if (i >= 0) {
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
76
continue;
77
}
78
break;
79
80
CASE_OP_32_64(brcond):
81
- tmp = do_constant_folding_cond(opc, op->args[0],
82
- op->args[1], op->args[2]);
83
- switch (tmp) {
84
- case 0:
85
+ i = do_constant_folding_cond(opc, op->args[0],
86
+ op->args[1], op->args[2]);
87
+ if (i == 0) {
88
tcg_op_remove(s, op);
89
continue;
90
- case 1:
91
+ } else if (i > 0) {
92
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
93
op->opc = opc = INDEX_op_br;
94
op->args[0] = op->args[3];
95
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
96
break;
97
98
CASE_OP_32_64(movcond):
99
- tmp = do_constant_folding_cond(opc, op->args[1],
100
- op->args[2], op->args[5]);
101
- if (tmp != 2) {
102
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
103
+ i = do_constant_folding_cond(opc, op->args[1],
104
+ op->args[2], op->args[5]);
105
+ if (i >= 0) {
106
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
107
continue;
108
}
109
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
break;
112
113
case INDEX_op_brcond2_i32:
114
- tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
115
- op->args[4]);
116
- if (tmp == 0) {
117
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2],
118
+ op->args[4]);
119
+ if (i == 0) {
120
do_brcond_false:
121
tcg_op_remove(s, op);
122
continue;
123
}
124
- if (tmp == 1) {
125
+ if (i > 0) {
126
do_brcond_true:
127
op->opc = opc = INDEX_op_br;
128
op->args[0] = op->args[5];
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
if (op->args[4] == TCG_COND_EQ) {
131
/* Simplify EQ comparisons where one of the pairs
132
can be simplified. */
133
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
134
- op->args[0], op->args[2],
135
- TCG_COND_EQ);
136
- if (tmp == 0) {
137
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
138
+ op->args[0], op->args[2],
139
+ TCG_COND_EQ);
140
+ if (i == 0) {
141
goto do_brcond_false;
142
- } else if (tmp == 1) {
143
+ } else if (i > 0) {
144
goto do_brcond_high;
145
}
146
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
147
- op->args[1], op->args[3],
148
- TCG_COND_EQ);
149
- if (tmp == 0) {
150
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
151
+ op->args[1], op->args[3],
152
+ TCG_COND_EQ);
153
+ if (i == 0) {
154
goto do_brcond_false;
155
- } else if (tmp != 1) {
156
+ } else if (i < 0) {
157
break;
158
}
159
do_brcond_low:
160
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
161
if (op->args[4] == TCG_COND_NE) {
162
/* Simplify NE comparisons where one of the pairs
163
can be simplified. */
164
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
165
- op->args[0], op->args[2],
166
- TCG_COND_NE);
167
- if (tmp == 0) {
168
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
169
+ op->args[0], op->args[2],
170
+ TCG_COND_NE);
171
+ if (i == 0) {
172
goto do_brcond_high;
173
- } else if (tmp == 1) {
174
+ } else if (i > 0) {
175
goto do_brcond_true;
176
}
177
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
178
- op->args[1], op->args[3],
179
- TCG_COND_NE);
180
- if (tmp == 0) {
181
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
182
+ op->args[1], op->args[3],
183
+ TCG_COND_NE);
184
+ if (i == 0) {
185
goto do_brcond_low;
186
- } else if (tmp == 1) {
187
+ } else if (i > 0) {
188
goto do_brcond_true;
189
}
190
}
191
break;
192
193
case INDEX_op_setcond2_i32:
194
- tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
195
- op->args[5]);
196
- if (tmp != 2) {
197
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3],
198
+ op->args[5]);
199
+ if (i >= 0) {
200
do_setcond_const:
201
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
202
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
203
continue;
204
}
205
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
206
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
207
if (op->args[5] == TCG_COND_EQ) {
208
/* Simplify EQ comparisons where one of the pairs
209
can be simplified. */
210
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
211
- op->args[1], op->args[3],
212
- TCG_COND_EQ);
213
- if (tmp == 0) {
214
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
215
+ op->args[1], op->args[3],
216
+ TCG_COND_EQ);
217
+ if (i == 0) {
218
goto do_setcond_const;
219
- } else if (tmp == 1) {
220
+ } else if (i > 0) {
221
goto do_setcond_high;
222
}
223
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
224
- op->args[2], op->args[4],
225
- TCG_COND_EQ);
226
- if (tmp == 0) {
227
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
228
+ op->args[2], op->args[4],
229
+ TCG_COND_EQ);
230
+ if (i == 0) {
231
goto do_setcond_high;
232
- } else if (tmp != 1) {
233
+ } else if (i < 0) {
234
break;
235
}
236
do_setcond_low:
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
238
if (op->args[5] == TCG_COND_NE) {
239
/* Simplify NE comparisons where one of the pairs
240
can be simplified. */
241
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
242
- op->args[1], op->args[3],
243
- TCG_COND_NE);
244
- if (tmp == 0) {
245
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
246
+ op->args[1], op->args[3],
247
+ TCG_COND_NE);
248
+ if (i == 0) {
249
goto do_setcond_high;
250
- } else if (tmp == 1) {
251
+ } else if (i > 0) {
252
goto do_setcond_const;
253
}
254
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
255
- op->args[2], op->args[4],
256
- TCG_COND_NE);
257
- if (tmp == 0) {
258
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
259
+ op->args[2], op->args[4],
260
+ TCG_COND_NE);
261
+ if (i == 0) {
262
goto do_setcond_low;
263
- } else if (tmp == 1) {
264
+ } else if (i > 0) {
265
goto do_setcond_const;
266
}
267
}
268
--
269
2.25.1
270
271
diff view generated by jsdifflib
Deleted patch
1
This will allow callers to tail call to these functions
2
and return true indicating processing complete.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 9 +++++----
10
1 file changed, 5 insertions(+), 4 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
17
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
18
}
19
20
-static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
21
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
22
{
23
TCGTemp *dst_ts = arg_temp(dst);
24
TCGTemp *src_ts = arg_temp(src);
25
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
26
27
if (ts_are_copies(dst_ts, src_ts)) {
28
tcg_op_remove(ctx->tcg, op);
29
- return;
30
+ return true;
31
}
32
33
reset_ts(dst_ts);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
35
di->is_const = si->is_const;
36
di->val = si->val;
37
}
38
+ return true;
39
}
40
41
-static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
42
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
43
TCGArg dst, uint64_t val)
44
{
45
const TCGOpDef *def = &tcg_op_defs[op->opc];
46
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
47
/* Convert movi to mov with constant temp. */
48
tv = tcg_constant_internal(type, val);
49
init_ts_info(ctx, tv);
50
- tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
51
+ return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
52
}
53
54
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
55
--
56
2.25.1
57
58
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 9 ++++++---
7
1 file changed, 6 insertions(+), 3 deletions(-)
8
1
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
14
uint64_t z_mask, partmask, affected, tmp;
15
TCGOpcode opc = op->opc;
16
const TCGOpDef *def;
17
+ bool done = false;
18
19
/* Calls are special. */
20
if (opc == INDEX_op_call) {
21
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
22
allocator where needed and possible. Also detect copies. */
23
switch (opc) {
24
CASE_OP_32_64_VEC(mov):
25
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
26
- continue;
27
+ done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
28
+ break;
29
30
case INDEX_op_dup_vec:
31
if (arg_is_const(op->args[1])) {
32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
break;
34
}
35
36
- finish_folding(&ctx, op);
37
+ if (!done) {
38
+ finish_folding(&ctx, op);
39
+ }
40
41
/* Eliminate duplicate and redundant fence instructions. */
42
if (ctx.prev_mb) {
43
--
44
2.25.1
45
46
diff view generated by jsdifflib
Deleted patch
1
This puts the separate mb optimization into the same framework
2
as the others. While fold_qemu_{ld,st} are currently identical,
3
that won't last as more code gets moved.
4
1
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
10
1 file changed, 51 insertions(+), 38 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
17
return true;
18
}
19
20
+static bool fold_mb(OptContext *ctx, TCGOp *op)
21
+{
22
+ /* Eliminate duplicate and redundant fence instructions. */
23
+ if (ctx->prev_mb) {
24
+ /*
25
+ * Merge two barriers of the same type into one,
26
+ * or a weaker barrier into a stronger one,
27
+ * or two weaker barriers into a stronger one.
28
+ * mb X; mb Y => mb X|Y
29
+ * mb; strl => mb; st
30
+ * ldaq; mb => ld; mb
31
+ * ldaq; strl => ld; mb; st
32
+ * Other combinations are also merged into a strong
33
+ * barrier. This is stricter than specified but for
34
+ * the purposes of TCG is better than not optimizing.
35
+ */
36
+ ctx->prev_mb->args[0] |= op->args[0];
37
+ tcg_op_remove(ctx->tcg, op);
38
+ } else {
39
+ ctx->prev_mb = op;
40
+ }
41
+ return true;
42
+}
43
+
44
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
45
+{
46
+ /* Opcodes that touch guest memory stop the mb optimization. */
47
+ ctx->prev_mb = NULL;
48
+ return false;
49
+}
50
+
51
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
52
+{
53
+ /* Opcodes that touch guest memory stop the mb optimization. */
54
+ ctx->prev_mb = NULL;
55
+ return false;
56
+}
57
+
58
/* Propagate constants and copies, fold constant expressions. */
59
void tcg_optimize(TCGContext *s)
60
{
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
}
63
break;
64
65
+ case INDEX_op_mb:
66
+ done = fold_mb(&ctx, op);
67
+ break;
68
+ case INDEX_op_qemu_ld_i32:
69
+ case INDEX_op_qemu_ld_i64:
70
+ done = fold_qemu_ld(&ctx, op);
71
+ break;
72
+ case INDEX_op_qemu_st_i32:
73
+ case INDEX_op_qemu_st8_i32:
74
+ case INDEX_op_qemu_st_i64:
75
+ done = fold_qemu_st(&ctx, op);
76
+ break;
77
+
78
default:
79
break;
80
}
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
82
if (!done) {
83
finish_folding(&ctx, op);
84
}
85
-
86
- /* Eliminate duplicate and redundant fence instructions. */
87
- if (ctx.prev_mb) {
88
- switch (opc) {
89
- case INDEX_op_mb:
90
- /* Merge two barriers of the same type into one,
91
- * or a weaker barrier into a stronger one,
92
- * or two weaker barriers into a stronger one.
93
- * mb X; mb Y => mb X|Y
94
- * mb; strl => mb; st
95
- * ldaq; mb => ld; mb
96
- * ldaq; strl => ld; mb; st
97
- * Other combinations are also merged into a strong
98
- * barrier. This is stricter than specified but for
99
- * the purposes of TCG is better than not optimizing.
100
- */
101
- ctx.prev_mb->args[0] |= op->args[0];
102
- tcg_op_remove(s, op);
103
- break;
104
-
105
- default:
106
- /* Opcodes that end the block stop the optimization. */
107
- if ((def->flags & TCG_OPF_BB_END) == 0) {
108
- break;
109
- }
110
- /* fallthru */
111
- case INDEX_op_qemu_ld_i32:
112
- case INDEX_op_qemu_ld_i64:
113
- case INDEX_op_qemu_st_i32:
114
- case INDEX_op_qemu_st8_i32:
115
- case INDEX_op_qemu_st_i64:
116
- /* Opcodes that touch guest memory stop the optimization. */
117
- ctx.prev_mb = NULL;
118
- break;
119
- }
120
- } else if (opc == INDEX_op_mb) {
121
- ctx.prev_mb = op;
122
- }
123
}
124
}
125
--
126
2.25.1
127
128
diff view generated by jsdifflib
Deleted patch
1
Split out a whole bunch of placeholder functions, which are
2
currently identical. That won't last as more code gets moved.
3
1
4
Use CASE_32_64_VEC for some logical operators that previously
5
missed the addition of vectors.
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
11
tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
12
1 file changed, 219 insertions(+), 52 deletions(-)
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
19
}
20
}
21
22
+/*
23
+ * The fold_* functions return true when processing is complete,
24
+ * usually by folding the operation to a constant or to a copy,
25
+ * and calling tcg_opt_gen_{mov,movi}. They may do other things,
26
+ * like collect information about the value produced, for use in
27
+ * optimizing a subsequent operation.
28
+ *
29
+ * These first fold_* functions are all helpers, used by other
30
+ * folders for more specific operations.
31
+ */
32
+
33
+static bool fold_const1(OptContext *ctx, TCGOp *op)
34
+{
35
+ if (arg_is_const(op->args[1])) {
36
+ uint64_t t;
37
+
38
+ t = arg_info(op->args[1])->val;
39
+ t = do_constant_folding(op->opc, t, 0);
40
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
41
+ }
42
+ return false;
43
+}
44
+
45
+static bool fold_const2(OptContext *ctx, TCGOp *op)
46
+{
47
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
48
+ uint64_t t1 = arg_info(op->args[1])->val;
49
+ uint64_t t2 = arg_info(op->args[2])->val;
50
+
51
+ t1 = do_constant_folding(op->opc, t1, t2);
52
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
53
+ }
54
+ return false;
55
+}
56
+
57
+/*
58
+ * These outermost fold_<op> functions are sorted alphabetically.
59
+ */
60
+
61
+static bool fold_add(OptContext *ctx, TCGOp *op)
62
+{
63
+ return fold_const2(ctx, op);
64
+}
65
+
66
+static bool fold_and(OptContext *ctx, TCGOp *op)
67
+{
68
+ return fold_const2(ctx, op);
69
+}
70
+
71
+static bool fold_andc(OptContext *ctx, TCGOp *op)
72
+{
73
+ return fold_const2(ctx, op);
74
+}
75
+
76
static bool fold_call(OptContext *ctx, TCGOp *op)
77
{
78
TCGContext *s = ctx->tcg;
79
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
80
return true;
81
}
82
83
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
84
+{
85
+ return fold_const1(ctx, op);
86
+}
87
+
88
+static bool fold_divide(OptContext *ctx, TCGOp *op)
89
+{
90
+ return fold_const2(ctx, op);
91
+}
92
+
93
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
94
+{
95
+ return fold_const2(ctx, op);
96
+}
97
+
98
+static bool fold_exts(OptContext *ctx, TCGOp *op)
99
+{
100
+ return fold_const1(ctx, op);
101
+}
102
+
103
+static bool fold_extu(OptContext *ctx, TCGOp *op)
104
+{
105
+ return fold_const1(ctx, op);
106
+}
107
+
108
static bool fold_mb(OptContext *ctx, TCGOp *op)
109
{
110
/* Eliminate duplicate and redundant fence instructions. */
111
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
112
return true;
113
}
114
115
+static bool fold_mul(OptContext *ctx, TCGOp *op)
116
+{
117
+ return fold_const2(ctx, op);
118
+}
119
+
120
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
121
+{
122
+ return fold_const2(ctx, op);
123
+}
124
+
125
+static bool fold_nand(OptContext *ctx, TCGOp *op)
126
+{
127
+ return fold_const2(ctx, op);
128
+}
129
+
130
+static bool fold_neg(OptContext *ctx, TCGOp *op)
131
+{
132
+ return fold_const1(ctx, op);
133
+}
134
+
135
+static bool fold_nor(OptContext *ctx, TCGOp *op)
136
+{
137
+ return fold_const2(ctx, op);
138
+}
139
+
140
+static bool fold_not(OptContext *ctx, TCGOp *op)
141
+{
142
+ return fold_const1(ctx, op);
143
+}
144
+
145
+static bool fold_or(OptContext *ctx, TCGOp *op)
146
+{
147
+ return fold_const2(ctx, op);
148
+}
149
+
150
+static bool fold_orc(OptContext *ctx, TCGOp *op)
151
+{
152
+ return fold_const2(ctx, op);
153
+}
154
+
155
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
156
{
157
/* Opcodes that touch guest memory stop the mb optimization. */
158
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
159
return false;
160
}
161
162
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
163
+{
164
+ return fold_const2(ctx, op);
165
+}
166
+
167
+static bool fold_shift(OptContext *ctx, TCGOp *op)
168
+{
169
+ return fold_const2(ctx, op);
170
+}
171
+
172
+static bool fold_sub(OptContext *ctx, TCGOp *op)
173
+{
174
+ return fold_const2(ctx, op);
175
+}
176
+
177
+static bool fold_xor(OptContext *ctx, TCGOp *op)
178
+{
179
+ return fold_const2(ctx, op);
180
+}
181
+
182
/* Propagate constants and copies, fold constant expressions. */
183
void tcg_optimize(TCGContext *s)
184
{
185
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
186
}
187
break;
188
189
- CASE_OP_32_64(not):
190
- CASE_OP_32_64(neg):
191
- CASE_OP_32_64(ext8s):
192
- CASE_OP_32_64(ext8u):
193
- CASE_OP_32_64(ext16s):
194
- CASE_OP_32_64(ext16u):
195
- CASE_OP_32_64(ctpop):
196
- case INDEX_op_ext32s_i64:
197
- case INDEX_op_ext32u_i64:
198
- case INDEX_op_ext_i32_i64:
199
- case INDEX_op_extu_i32_i64:
200
- case INDEX_op_extrl_i64_i32:
201
- case INDEX_op_extrh_i64_i32:
202
- if (arg_is_const(op->args[1])) {
203
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
204
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
205
- continue;
206
- }
207
- break;
208
-
209
CASE_OP_32_64(bswap16):
210
CASE_OP_32_64(bswap32):
211
case INDEX_op_bswap64_i64:
212
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
213
}
214
break;
215
216
- CASE_OP_32_64(add):
217
- CASE_OP_32_64(sub):
218
- CASE_OP_32_64(mul):
219
- CASE_OP_32_64(or):
220
- CASE_OP_32_64(and):
221
- CASE_OP_32_64(xor):
222
- CASE_OP_32_64(shl):
223
- CASE_OP_32_64(shr):
224
- CASE_OP_32_64(sar):
225
- CASE_OP_32_64(rotl):
226
- CASE_OP_32_64(rotr):
227
- CASE_OP_32_64(andc):
228
- CASE_OP_32_64(orc):
229
- CASE_OP_32_64(eqv):
230
- CASE_OP_32_64(nand):
231
- CASE_OP_32_64(nor):
232
- CASE_OP_32_64(muluh):
233
- CASE_OP_32_64(mulsh):
234
- CASE_OP_32_64(div):
235
- CASE_OP_32_64(divu):
236
- CASE_OP_32_64(rem):
237
- CASE_OP_32_64(remu):
238
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
239
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
240
- arg_info(op->args[2])->val);
241
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
242
- continue;
243
- }
244
- break;
245
-
246
CASE_OP_32_64(clz):
247
CASE_OP_32_64(ctz):
248
if (arg_is_const(op->args[1])) {
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
}
251
break;
252
253
+ default:
254
+ break;
255
+
256
+ /* ---------------------------------------------------------- */
257
+ /* Sorted alphabetically by opcode as much as possible. */
258
+
259
+ CASE_OP_32_64_VEC(add):
260
+ done = fold_add(&ctx, op);
261
+ break;
262
+ CASE_OP_32_64_VEC(and):
263
+ done = fold_and(&ctx, op);
264
+ break;
265
+ CASE_OP_32_64_VEC(andc):
266
+ done = fold_andc(&ctx, op);
267
+ break;
268
+ CASE_OP_32_64(ctpop):
269
+ done = fold_ctpop(&ctx, op);
270
+ break;
271
+ CASE_OP_32_64(div):
272
+ CASE_OP_32_64(divu):
273
+ done = fold_divide(&ctx, op);
274
+ break;
275
+ CASE_OP_32_64(eqv):
276
+ done = fold_eqv(&ctx, op);
277
+ break;
278
+ CASE_OP_32_64(ext8s):
279
+ CASE_OP_32_64(ext16s):
280
+ case INDEX_op_ext32s_i64:
281
+ case INDEX_op_ext_i32_i64:
282
+ done = fold_exts(&ctx, op);
283
+ break;
284
+ CASE_OP_32_64(ext8u):
285
+ CASE_OP_32_64(ext16u):
286
+ case INDEX_op_ext32u_i64:
287
+ case INDEX_op_extu_i32_i64:
288
+ case INDEX_op_extrl_i64_i32:
289
+ case INDEX_op_extrh_i64_i32:
290
+ done = fold_extu(&ctx, op);
291
+ break;
292
case INDEX_op_mb:
293
done = fold_mb(&ctx, op);
294
break;
295
+ CASE_OP_32_64(mul):
296
+ done = fold_mul(&ctx, op);
297
+ break;
298
+ CASE_OP_32_64(mulsh):
299
+ CASE_OP_32_64(muluh):
300
+ done = fold_mul_highpart(&ctx, op);
301
+ break;
302
+ CASE_OP_32_64(nand):
303
+ done = fold_nand(&ctx, op);
304
+ break;
305
+ CASE_OP_32_64(neg):
306
+ done = fold_neg(&ctx, op);
307
+ break;
308
+ CASE_OP_32_64(nor):
309
+ done = fold_nor(&ctx, op);
310
+ break;
311
+ CASE_OP_32_64_VEC(not):
312
+ done = fold_not(&ctx, op);
313
+ break;
314
+ CASE_OP_32_64_VEC(or):
315
+ done = fold_or(&ctx, op);
316
+ break;
317
+ CASE_OP_32_64_VEC(orc):
318
+ done = fold_orc(&ctx, op);
319
+ break;
320
case INDEX_op_qemu_ld_i32:
321
case INDEX_op_qemu_ld_i64:
322
done = fold_qemu_ld(&ctx, op);
323
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
324
case INDEX_op_qemu_st_i64:
325
done = fold_qemu_st(&ctx, op);
326
break;
327
-
328
- default:
329
+ CASE_OP_32_64(rem):
330
+ CASE_OP_32_64(remu):
331
+ done = fold_remainder(&ctx, op);
332
+ break;
333
+ CASE_OP_32_64(rotl):
334
+ CASE_OP_32_64(rotr):
335
+ CASE_OP_32_64(sar):
336
+ CASE_OP_32_64(shl):
337
+ CASE_OP_32_64(shr):
338
+ done = fold_shift(&ctx, op);
339
+ break;
340
+ CASE_OP_32_64_VEC(sub):
341
+ done = fold_sub(&ctx, op);
342
+ break;
343
+ CASE_OP_32_64_VEC(xor):
344
+ done = fold_xor(&ctx, op);
345
break;
346
}
347
348
--
349
2.25.1
350
351
diff view generated by jsdifflib
Deleted patch
1
Reduce some code duplication by folding the NE and EQ cases.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
8
1 file changed, 72 insertions(+), 73 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
15
return fold_const2(ctx, op);
16
}
17
18
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
19
+{
20
+ TCGCond cond = op->args[5];
21
+ int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
22
+ int inv = 0;
23
+
24
+ if (i >= 0) {
25
+ goto do_setcond_const;
26
+ }
27
+
28
+ switch (cond) {
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
34
+ */
35
+ if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
36
+ arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
37
+ goto do_setcond_high;
38
+ }
39
+ break;
40
+
41
+ case TCG_COND_NE:
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
50
+ op->args[3], cond);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_setcond_const;
54
+ case 1:
55
+ goto do_setcond_high;
56
+ }
57
+
58
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
59
+ op->args[4], cond);
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_setcond_const;
63
+ case 1:
64
+ op->args[2] = op->args[3];
65
+ op->args[3] = cond;
66
+ op->opc = INDEX_op_setcond_i32;
67
+ break;
68
+ }
69
+ break;
70
+
71
+ default:
72
+ break;
73
+
74
+ do_setcond_high:
75
+ op->args[1] = op->args[2];
76
+ op->args[2] = op->args[4];
77
+ op->args[3] = cond;
78
+ op->opc = INDEX_op_setcond_i32;
79
+ break;
80
+ }
81
+ return false;
82
+
83
+ do_setcond_const:
84
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
85
+}
86
+
87
static bool fold_shift(OptContext *ctx, TCGOp *op)
88
{
89
return fold_const2(ctx, op);
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
}
92
break;
93
94
- case INDEX_op_setcond2_i32:
95
- i = do_constant_folding_cond2(&op->args[1], &op->args[3],
96
- op->args[5]);
97
- if (i >= 0) {
98
- do_setcond_const:
99
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
100
- continue;
101
- }
102
- if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
103
- && arg_is_const(op->args[3])
104
- && arg_info(op->args[3])->val == 0
105
- && arg_is_const(op->args[4])
106
- && arg_info(op->args[4])->val == 0) {
107
- /* Simplify LT/GE comparisons vs zero to a single compare
108
- vs the high word of the input. */
109
- do_setcond_high:
110
- reset_temp(op->args[0]);
111
- arg_info(op->args[0])->z_mask = 1;
112
- op->opc = INDEX_op_setcond_i32;
113
- op->args[1] = op->args[2];
114
- op->args[2] = op->args[4];
115
- op->args[3] = op->args[5];
116
- break;
117
- }
118
- if (op->args[5] == TCG_COND_EQ) {
119
- /* Simplify EQ comparisons where one of the pairs
120
- can be simplified. */
121
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
122
- op->args[1], op->args[3],
123
- TCG_COND_EQ);
124
- if (i == 0) {
125
- goto do_setcond_const;
126
- } else if (i > 0) {
127
- goto do_setcond_high;
128
- }
129
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
130
- op->args[2], op->args[4],
131
- TCG_COND_EQ);
132
- if (i == 0) {
133
- goto do_setcond_high;
134
- } else if (i < 0) {
135
- break;
136
- }
137
- do_setcond_low:
138
- reset_temp(op->args[0]);
139
- arg_info(op->args[0])->z_mask = 1;
140
- op->opc = INDEX_op_setcond_i32;
141
- op->args[2] = op->args[3];
142
- op->args[3] = op->args[5];
143
- break;
144
- }
145
- if (op->args[5] == TCG_COND_NE) {
146
- /* Simplify NE comparisons where one of the pairs
147
- can be simplified. */
148
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
149
- op->args[1], op->args[3],
150
- TCG_COND_NE);
151
- if (i == 0) {
152
- goto do_setcond_high;
153
- } else if (i > 0) {
154
- goto do_setcond_const;
155
- }
156
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
157
- op->args[2], op->args[4],
158
- TCG_COND_NE);
159
- if (i == 0) {
160
- goto do_setcond_low;
161
- } else if (i > 0) {
162
- goto do_setcond_const;
163
- }
164
- }
165
- break;
166
-
167
default:
168
break;
169
170
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
171
CASE_OP_32_64(shr):
172
done = fold_shift(&ctx, op);
173
break;
174
+ case INDEX_op_setcond2_i32:
175
+ done = fold_setcond2(&ctx, op);
176
+ break;
177
CASE_OP_32_64_VEC(sub):
178
done = fold_sub(&ctx, op);
179
break;
180
--
181
2.25.1
182
183
diff view generated by jsdifflib
Deleted patch
1
Reduce some code duplication by folding the NE and EQ cases.
2
1
3
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
7
1 file changed, 81 insertions(+), 78 deletions(-)
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
14
return fold_const2(ctx, op);
15
}
16
17
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
18
+{
19
+ TCGCond cond = op->args[4];
20
+ int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
21
+ TCGArg label = op->args[5];
22
+ int inv = 0;
23
+
24
+ if (i >= 0) {
25
+ goto do_brcond_const;
26
+ }
27
+
28
+ switch (cond) {
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
34
+ */
35
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
36
+ arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
37
+ goto do_brcond_high;
38
+ }
39
+ break;
40
+
41
+ case TCG_COND_NE:
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
50
+ op->args[2], cond);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_brcond_const;
54
+ case 1:
55
+ goto do_brcond_high;
56
+ }
57
+
58
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
59
+ op->args[3], cond);
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_brcond_const;
63
+ case 1:
64
+ op->opc = INDEX_op_brcond_i32;
65
+ op->args[1] = op->args[2];
66
+ op->args[2] = cond;
67
+ op->args[3] = label;
68
+ break;
69
+ }
70
+ break;
71
+
72
+ default:
73
+ break;
74
+
75
+ do_brcond_high:
76
+ op->opc = INDEX_op_brcond_i32;
77
+ op->args[0] = op->args[1];
78
+ op->args[1] = op->args[3];
79
+ op->args[2] = cond;
80
+ op->args[3] = label;
81
+ break;
82
+
83
+ do_brcond_const:
84
+ if (i == 0) {
85
+ tcg_op_remove(ctx->tcg, op);
86
+ return true;
87
+ }
88
+ op->opc = INDEX_op_br;
89
+ op->args[0] = label;
90
+ break;
91
+ }
92
+ return false;
93
+}
94
+
95
static bool fold_call(OptContext *ctx, TCGOp *op)
96
{
97
TCGContext *s = ctx->tcg;
98
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
99
}
100
break;
101
102
- case INDEX_op_brcond2_i32:
103
- i = do_constant_folding_cond2(&op->args[0], &op->args[2],
104
- op->args[4]);
105
- if (i == 0) {
106
- do_brcond_false:
107
- tcg_op_remove(s, op);
108
- continue;
109
- }
110
- if (i > 0) {
111
- do_brcond_true:
112
- op->opc = opc = INDEX_op_br;
113
- op->args[0] = op->args[5];
114
- break;
115
- }
116
- if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
117
- && arg_is_const(op->args[2])
118
- && arg_info(op->args[2])->val == 0
119
- && arg_is_const(op->args[3])
120
- && arg_info(op->args[3])->val == 0) {
121
- /* Simplify LT/GE comparisons vs zero to a single compare
122
- vs the high word of the input. */
123
- do_brcond_high:
124
- op->opc = opc = INDEX_op_brcond_i32;
125
- op->args[0] = op->args[1];
126
- op->args[1] = op->args[3];
127
- op->args[2] = op->args[4];
128
- op->args[3] = op->args[5];
129
- break;
130
- }
131
- if (op->args[4] == TCG_COND_EQ) {
132
- /* Simplify EQ comparisons where one of the pairs
133
- can be simplified. */
134
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
135
- op->args[0], op->args[2],
136
- TCG_COND_EQ);
137
- if (i == 0) {
138
- goto do_brcond_false;
139
- } else if (i > 0) {
140
- goto do_brcond_high;
141
- }
142
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
143
- op->args[1], op->args[3],
144
- TCG_COND_EQ);
145
- if (i == 0) {
146
- goto do_brcond_false;
147
- } else if (i < 0) {
148
- break;
149
- }
150
- do_brcond_low:
151
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
152
- op->opc = INDEX_op_brcond_i32;
153
- op->args[1] = op->args[2];
154
- op->args[2] = op->args[4];
155
- op->args[3] = op->args[5];
156
- break;
157
- }
158
- if (op->args[4] == TCG_COND_NE) {
159
- /* Simplify NE comparisons where one of the pairs
160
- can be simplified. */
161
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
162
- op->args[0], op->args[2],
163
- TCG_COND_NE);
164
- if (i == 0) {
165
- goto do_brcond_high;
166
- } else if (i > 0) {
167
- goto do_brcond_true;
168
- }
169
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
170
- op->args[1], op->args[3],
171
- TCG_COND_NE);
172
- if (i == 0) {
173
- goto do_brcond_low;
174
- } else if (i > 0) {
175
- goto do_brcond_true;
176
- }
177
- }
178
- break;
179
-
180
default:
181
break;
182
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
CASE_OP_32_64_VEC(andc):
185
done = fold_andc(&ctx, op);
186
break;
187
+ case INDEX_op_brcond2_i32:
188
+ done = fold_brcond2(&ctx, op);
189
+ break;
190
CASE_OP_32_64(ctpop):
191
done = fold_ctpop(&ctx, op);
192
break;
193
--
194
2.25.1
195
196
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 33 +++++++++++++++++++--------------
6
1 file changed, 19 insertions(+), 14 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
17
+{
18
+ TCGCond cond = op->args[2];
19
+ int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
20
+
21
+ if (i == 0) {
22
+ tcg_op_remove(ctx->tcg, op);
23
+ return true;
24
+ }
25
+ if (i > 0) {
26
+ op->opc = INDEX_op_br;
27
+ op->args[0] = op->args[3];
28
+ }
29
+ return false;
30
+}
31
+
32
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
33
{
34
TCGCond cond = op->args[4];
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
36
}
37
break;
38
39
- CASE_OP_32_64(brcond):
40
- i = do_constant_folding_cond(opc, op->args[0],
41
- op->args[1], op->args[2]);
42
- if (i == 0) {
43
- tcg_op_remove(s, op);
44
- continue;
45
- } else if (i > 0) {
46
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
47
- op->opc = opc = INDEX_op_br;
48
- op->args[0] = op->args[3];
49
- break;
50
- }
51
- break;
52
-
53
CASE_OP_32_64(movcond):
54
i = do_constant_folding_cond(opc, op->args[1],
55
op->args[2], op->args[5]);
56
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
57
CASE_OP_32_64_VEC(andc):
58
done = fold_andc(&ctx, op);
59
break;
60
+ CASE_OP_32_64(brcond):
61
+ done = fold_brcond(&ctx, op);
62
+ break;
63
case INDEX_op_brcond2_i32:
64
done = fold_brcond2(&ctx, op);
65
break;
66
--
67
2.25.1
68
69
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 23 ++++++++++++++---------
6
1 file changed, 14 insertions(+), 9 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
17
+{
18
+ TCGCond cond = op->args[3];
19
+ int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
20
+
21
+ if (i >= 0) {
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
+ }
24
+ return false;
25
+}
26
+
27
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
28
{
29
TCGCond cond = op->args[5];
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
}
32
break;
33
34
- CASE_OP_32_64(setcond):
35
- i = do_constant_folding_cond(opc, op->args[1],
36
- op->args[2], op->args[3]);
37
- if (i >= 0) {
38
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
39
- continue;
40
- }
41
- break;
42
-
43
CASE_OP_32_64(movcond):
44
i = do_constant_folding_cond(opc, op->args[1],
45
op->args[2], op->args[5]);
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
47
CASE_OP_32_64(shr):
48
done = fold_shift(&ctx, op);
49
break;
50
+ CASE_OP_32_64(setcond):
51
+ done = fold_setcond(&ctx, op);
52
+ break;
53
case INDEX_op_setcond2_i32:
54
done = fold_setcond2(&ctx, op);
55
break;
56
--
57
2.25.1
58
59
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 37 +++++++++++++++++++++----------------
6
1 file changed, 21 insertions(+), 16 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
19
+ uint32_t a = arg_info(op->args[2])->val;
20
+ uint32_t b = arg_info(op->args[3])->val;
21
+ uint64_t r = (uint64_t)a * b;
22
+ TCGArg rl, rh;
23
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
24
+
25
+ rl = op->args[0];
26
+ rh = op->args[1];
27
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
28
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
29
+ return true;
30
+ }
31
+ return false;
32
+}
33
+
34
static bool fold_nand(OptContext *ctx, TCGOp *op)
35
{
36
return fold_const2(ctx, op);
37
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
38
}
39
break;
40
41
- case INDEX_op_mulu2_i32:
42
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
43
- uint32_t a = arg_info(op->args[2])->val;
44
- uint32_t b = arg_info(op->args[3])->val;
45
- uint64_t r = (uint64_t)a * b;
46
- TCGArg rl, rh;
47
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
48
-
49
- rl = op->args[0];
50
- rh = op->args[1];
51
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
52
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
53
- continue;
54
- }
55
- break;
56
-
57
default:
58
break;
59
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
CASE_OP_32_64(muluh):
62
done = fold_mul_highpart(&ctx, op);
63
break;
64
+ case INDEX_op_mulu2_i32:
65
+ done = fold_mulu2_i32(&ctx, op);
66
+ break;
67
CASE_OP_32_64(nand):
68
done = fold_nand(&ctx, op);
69
break;
70
--
71
2.25.1
72
73
diff view generated by jsdifflib
Deleted patch
1
Add two additional helpers, fold_add2_i32 and fold_sub2_i32
2
which will not be simple wrappers forever.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
9
1 file changed, 44 insertions(+), 26 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
16
return fold_const2(ctx, op);
17
}
18
19
+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
20
+{
21
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
22
+ arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
23
+ uint32_t al = arg_info(op->args[2])->val;
24
+ uint32_t ah = arg_info(op->args[3])->val;
25
+ uint32_t bl = arg_info(op->args[4])->val;
26
+ uint32_t bh = arg_info(op->args[5])->val;
27
+ uint64_t a = ((uint64_t)ah << 32) | al;
28
+ uint64_t b = ((uint64_t)bh << 32) | bl;
29
+ TCGArg rl, rh;
30
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
31
+
32
+ if (add) {
33
+ a += b;
34
+ } else {
35
+ a -= b;
36
+ }
37
+
38
+ rl = op->args[0];
39
+ rh = op->args[1];
40
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
41
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
42
+ return true;
43
+ }
44
+ return false;
45
+}
46
+
47
+static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
48
+{
49
+ return fold_addsub2_i32(ctx, op, true);
50
+}
51
+
52
static bool fold_and(OptContext *ctx, TCGOp *op)
53
{
54
return fold_const2(ctx, op);
55
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
56
return fold_const2(ctx, op);
57
}
58
59
+static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
60
+{
61
+ return fold_addsub2_i32(ctx, op, false);
62
+}
63
+
64
static bool fold_xor(OptContext *ctx, TCGOp *op)
65
{
66
return fold_const2(ctx, op);
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
}
69
break;
70
71
- case INDEX_op_add2_i32:
72
- case INDEX_op_sub2_i32:
73
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
74
- && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
75
- uint32_t al = arg_info(op->args[2])->val;
76
- uint32_t ah = arg_info(op->args[3])->val;
77
- uint32_t bl = arg_info(op->args[4])->val;
78
- uint32_t bh = arg_info(op->args[5])->val;
79
- uint64_t a = ((uint64_t)ah << 32) | al;
80
- uint64_t b = ((uint64_t)bh << 32) | bl;
81
- TCGArg rl, rh;
82
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
83
-
84
- if (opc == INDEX_op_add2_i32) {
85
- a += b;
86
- } else {
87
- a -= b;
88
- }
89
-
90
- rl = op->args[0];
91
- rh = op->args[1];
92
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
93
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
94
- continue;
95
- }
96
- break;
97
98
default:
99
break;
100
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
101
CASE_OP_32_64_VEC(add):
102
done = fold_add(&ctx, op);
103
break;
104
+ case INDEX_op_add2_i32:
105
+ done = fold_add2_i32(&ctx, op);
106
+ break;
107
CASE_OP_32_64_VEC(and):
108
done = fold_and(&ctx, op);
109
break;
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64_VEC(sub):
112
done = fold_sub(&ctx, op);
113
break;
114
+ case INDEX_op_sub2_i32:
115
+ done = fold_sub2_i32(&ctx, op);
116
+ break;
117
CASE_OP_32_64_VEC(xor):
118
done = fold_xor(&ctx, op);
119
break;
120
--
121
2.25.1
122
123
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
6
1 file changed, 31 insertions(+), 25 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
13
return true;
14
}
15
16
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
17
+{
18
+ TCGOpcode opc = op->opc;
19
+ TCGCond cond = op->args[5];
20
+ int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
21
+
22
+ if (i >= 0) {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
24
+ }
25
+
26
+ if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
27
+ uint64_t tv = arg_info(op->args[3])->val;
28
+ uint64_t fv = arg_info(op->args[4])->val;
29
+
30
+ opc = (opc == INDEX_op_movcond_i32
31
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
32
+
33
+ if (tv == 1 && fv == 0) {
34
+ op->opc = opc;
35
+ op->args[3] = cond;
36
+ } else if (fv == 1 && tv == 0) {
37
+ op->opc = opc;
38
+ op->args[3] = tcg_invert_cond(cond);
39
+ }
40
+ }
41
+ return false;
42
+}
43
+
44
static bool fold_mul(OptContext *ctx, TCGOp *op)
45
{
46
return fold_const2(ctx, op);
47
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
}
49
break;
50
51
- CASE_OP_32_64(movcond):
52
- i = do_constant_folding_cond(opc, op->args[1],
53
- op->args[2], op->args[5]);
54
- if (i >= 0) {
55
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
56
- continue;
57
- }
58
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
59
- uint64_t tv = arg_info(op->args[3])->val;
60
- uint64_t fv = arg_info(op->args[4])->val;
61
- TCGCond cond = op->args[5];
62
-
63
- if (fv == 1 && tv == 0) {
64
- cond = tcg_invert_cond(cond);
65
- } else if (!(tv == 1 && fv == 0)) {
66
- break;
67
- }
68
- op->args[3] = cond;
69
- op->opc = opc = (opc == INDEX_op_movcond_i32
70
- ? INDEX_op_setcond_i32
71
- : INDEX_op_setcond_i64);
72
- }
73
- break;
74
-
75
-
76
default:
77
break;
78
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
case INDEX_op_mb:
81
done = fold_mb(&ctx, op);
82
break;
83
+ CASE_OP_32_64(movcond):
84
+ done = fold_movcond(&ctx, op);
85
+ break;
86
CASE_OP_32_64(mul):
87
done = fold_mul(&ctx, op);
88
break;
89
--
90
2.25.1
91
92
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
6
1 file changed, 22 insertions(+), 17 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
19
+ uint64_t v1 = arg_info(op->args[1])->val;
20
+ uint64_t v2 = arg_info(op->args[2])->val;
21
+ int shr = op->args[3];
22
+
23
+ if (op->opc == INDEX_op_extract2_i64) {
24
+ v1 >>= shr;
25
+ v2 <<= 64 - shr;
26
+ } else {
27
+ v1 = (uint32_t)v1 >> shr;
28
+ v2 = (int32_t)v2 << (32 - shr);
29
+ }
30
+ return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
31
+ }
32
+ return false;
33
+}
34
+
35
static bool fold_exts(OptContext *ctx, TCGOp *op)
36
{
37
return fold_const1(ctx, op);
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
39
}
40
break;
41
42
- CASE_OP_32_64(extract2):
43
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
44
- uint64_t v1 = arg_info(op->args[1])->val;
45
- uint64_t v2 = arg_info(op->args[2])->val;
46
- int shr = op->args[3];
47
-
48
- if (opc == INDEX_op_extract2_i64) {
49
- tmp = (v1 >> shr) | (v2 << (64 - shr));
50
- } else {
51
- tmp = (int32_t)(((uint32_t)v1 >> shr) |
52
- ((uint32_t)v2 << (32 - shr)));
53
- }
54
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
55
- continue;
56
- }
57
- break;
58
-
59
default:
60
break;
61
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
63
CASE_OP_32_64(eqv):
64
done = fold_eqv(&ctx, op);
65
break;
66
+ CASE_OP_32_64(extract2):
67
+ done = fold_extract2(&ctx, op);
68
+ break;
69
CASE_OP_32_64(ext8s):
70
CASE_OP_32_64(ext16s):
71
case INDEX_op_ext32s_i64:
72
--
73
2.25.1
74
75
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
6
1 file changed, 30 insertions(+), 18 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_extract(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t;
20
+
21
+ t = arg_info(op->args[1])->val;
22
+ t = extract64(t, op->args[2], op->args[3]);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
24
+ }
25
+ return false;
26
+}
27
+
28
static bool fold_extract2(OptContext *ctx, TCGOp *op)
29
{
30
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
31
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
32
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
33
}
34
35
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
36
+{
37
+ if (arg_is_const(op->args[1])) {
38
+ uint64_t t;
39
+
40
+ t = arg_info(op->args[1])->val;
41
+ t = sextract64(t, op->args[2], op->args[3]);
42
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
43
+ }
44
+ return false;
45
+}
46
+
47
static bool fold_shift(OptContext *ctx, TCGOp *op)
48
{
49
return fold_const2(ctx, op);
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
51
}
52
break;
53
54
- CASE_OP_32_64(extract):
55
- if (arg_is_const(op->args[1])) {
56
- tmp = extract64(arg_info(op->args[1])->val,
57
- op->args[2], op->args[3]);
58
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
59
- continue;
60
- }
61
- break;
62
-
63
- CASE_OP_32_64(sextract):
64
- if (arg_is_const(op->args[1])) {
65
- tmp = sextract64(arg_info(op->args[1])->val,
66
- op->args[2], op->args[3]);
67
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
68
- continue;
69
- }
70
- break;
71
-
72
default:
73
break;
74
75
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
76
CASE_OP_32_64(eqv):
77
done = fold_eqv(&ctx, op);
78
break;
79
+ CASE_OP_32_64(extract):
80
+ done = fold_extract(&ctx, op);
81
+ break;
82
CASE_OP_32_64(extract2):
83
done = fold_extract2(&ctx, op);
84
break;
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
case INDEX_op_setcond2_i32:
87
done = fold_setcond2(&ctx, op);
88
break;
89
+ CASE_OP_32_64(sextract):
90
+ done = fold_sextract(&ctx, op);
91
+ break;
92
CASE_OP_32_64_VEC(sub):
93
done = fold_sub(&ctx, op);
94
break;
95
--
96
2.25.1
97
98
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 25 +++++++++++++++----------
6
1 file changed, 15 insertions(+), 10 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
13
return fold_const1(ctx, op);
14
}
15
16
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
19
+ uint64_t t1 = arg_info(op->args[1])->val;
20
+ uint64_t t2 = arg_info(op->args[2])->val;
21
+
22
+ t1 = deposit64(t1, op->args[3], op->args[4], t2);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
24
+ }
25
+ return false;
26
+}
27
+
28
static bool fold_divide(OptContext *ctx, TCGOp *op)
29
{
30
return fold_const2(ctx, op);
31
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
32
}
33
break;
34
35
- CASE_OP_32_64(deposit):
36
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
37
- tmp = deposit64(arg_info(op->args[1])->val,
38
- op->args[3], op->args[4],
39
- arg_info(op->args[2])->val);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
42
- }
43
- break;
44
-
45
default:
46
break;
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
CASE_OP_32_64(ctpop):
50
done = fold_ctpop(&ctx, op);
51
break;
52
+ CASE_OP_32_64(deposit):
53
+ done = fold_deposit(&ctx, op);
54
+ break;
55
CASE_OP_32_64(div):
56
CASE_OP_32_64(divu):
57
done = fold_divide(&ctx, op);
58
--
59
2.25.1
60
61
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 32 ++++++++++++++++++--------------
6
1 file changed, 18 insertions(+), 14 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
13
return true;
14
}
15
16
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t = arg_info(op->args[1])->val;
20
+
21
+ if (t != 0) {
22
+ t = do_constant_folding(op->opc, t, 0);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
24
+ }
25
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
26
+ }
27
+ return false;
28
+}
29
+
30
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
31
{
32
return fold_const1(ctx, op);
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
34
}
35
break;
36
37
- CASE_OP_32_64(clz):
38
- CASE_OP_32_64(ctz):
39
- if (arg_is_const(op->args[1])) {
40
- TCGArg v = arg_info(op->args[1])->val;
41
- if (v != 0) {
42
- tmp = do_constant_folding(opc, v, 0);
43
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
44
- } else {
45
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
46
- }
47
- continue;
48
- }
49
- break;
50
-
51
default:
52
break;
53
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
case INDEX_op_brcond2_i32:
56
done = fold_brcond2(&ctx, op);
57
break;
58
+ CASE_OP_32_64(clz):
59
+ CASE_OP_32_64(ctz):
60
+ done = fold_count_zeros(&ctx, op);
61
+ break;
62
CASE_OP_32_64(ctpop):
63
done = fold_ctpop(&ctx, op);
64
break;
65
--
66
2.25.1
67
68
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 27 ++++++++++++++++-----------
6
1 file changed, 16 insertions(+), 11 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
13
return false;
14
}
15
16
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t = arg_info(op->args[1])->val;
20
+
21
+ t = do_constant_folding(op->opc, t, op->args[2]);
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
23
+ }
24
+ return false;
25
+}
26
+
27
static bool fold_call(OptContext *ctx, TCGOp *op)
28
{
29
TCGContext *s = ctx->tcg;
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
}
32
break;
33
34
- CASE_OP_32_64(bswap16):
35
- CASE_OP_32_64(bswap32):
36
- case INDEX_op_bswap64_i64:
37
- if (arg_is_const(op->args[1])) {
38
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
39
- op->args[2]);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
42
- }
43
- break;
44
-
45
default:
46
break;
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
case INDEX_op_brcond2_i32:
50
done = fold_brcond2(&ctx, op);
51
break;
52
+ CASE_OP_32_64(bswap16):
53
+ CASE_OP_32_64(bswap32):
54
+ case INDEX_op_bswap64_i64:
55
+ done = fold_bswap(&ctx, op);
56
+ break;
57
CASE_OP_32_64(clz):
58
CASE_OP_32_64(ctz):
59
done = fold_count_zeros(&ctx, op);
60
--
61
2.25.1
62
63
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
6
1 file changed, 31 insertions(+), 22 deletions(-)
7
1
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
14
}
15
16
+static bool fold_dup(OptContext *ctx, TCGOp *op)
17
+{
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t = arg_info(op->args[1])->val;
20
+ t = dup_const(TCGOP_VECE(op), t);
21
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
22
+ }
23
+ return false;
24
+}
25
+
26
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
27
+{
28
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
29
+ uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
30
+ arg_info(op->args[2])->val);
31
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
32
+ }
33
+
34
+ if (args_are_copies(op->args[1], op->args[2])) {
35
+ op->opc = INDEX_op_dup_vec;
36
+ TCGOP_VECE(op) = MO_32;
37
+ }
38
+ return false;
39
+}
40
+
41
static bool fold_eqv(OptContext *ctx, TCGOp *op)
42
{
43
return fold_const2(ctx, op);
44
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
45
done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
46
break;
47
48
- case INDEX_op_dup_vec:
49
- if (arg_is_const(op->args[1])) {
50
- tmp = arg_info(op->args[1])->val;
51
- tmp = dup_const(TCGOP_VECE(op), tmp);
52
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
53
- continue;
54
- }
55
- break;
56
-
57
- case INDEX_op_dup2_vec:
58
- assert(TCG_TARGET_REG_BITS == 32);
59
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
60
- tcg_opt_gen_movi(&ctx, op, op->args[0],
61
- deposit64(arg_info(op->args[1])->val, 32, 32,
62
- arg_info(op->args[2])->val));
63
- continue;
64
- } else if (args_are_copies(op->args[1], op->args[2])) {
65
- op->opc = INDEX_op_dup_vec;
66
- TCGOP_VECE(op) = MO_32;
67
- }
68
- break;
69
-
70
default:
71
break;
72
73
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
74
CASE_OP_32_64(divu):
75
done = fold_divide(&ctx, op);
76
break;
77
+ case INDEX_op_dup_vec:
78
+ done = fold_dup(&ctx, op);
79
+ break;
80
+ case INDEX_op_dup2_vec:
81
+ done = fold_dup2(&ctx, op);
82
+ break;
83
CASE_OP_32_64(eqv):
84
done = fold_eqv(&ctx, op);
85
break;
86
--
87
2.25.1
88
89
diff view generated by jsdifflib
Deleted patch
1
This is the final entry in the main switch that was in a
2
different form. After this, we have the option to convert
3
the switch into a function dispatch table.
4
1
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 27 ++++++++++++++-------------
10
1 file changed, 14 insertions(+), 13 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
17
return true;
18
}
19
20
+static bool fold_mov(OptContext *ctx, TCGOp *op)
21
+{
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
23
+}
24
+
25
static bool fold_movcond(OptContext *ctx, TCGOp *op)
26
{
27
TCGOpcode opc = op->opc;
28
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
29
break;
30
}
31
32
- /* Propagate constants through copy operations and do constant
33
- folding. Constants will be substituted to arguments by register
34
- allocator where needed and possible. Also detect copies. */
35
+ /*
36
+ * Process each opcode.
37
+ * Sorted alphabetically by opcode as much as possible.
38
+ */
39
switch (opc) {
40
- CASE_OP_32_64_VEC(mov):
41
- done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
42
- break;
43
-
44
- default:
45
- break;
46
-
47
- /* ---------------------------------------------------------- */
48
- /* Sorted alphabetically by opcode as much as possible. */
49
-
50
CASE_OP_32_64_VEC(add):
51
done = fold_add(&ctx, op);
52
break;
53
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
54
case INDEX_op_mb:
55
done = fold_mb(&ctx, op);
56
break;
57
+ CASE_OP_32_64_VEC(mov):
58
+ done = fold_mov(&ctx, op);
59
+ break;
60
CASE_OP_32_64(movcond):
61
done = fold_movcond(&ctx, op);
62
break;
63
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
64
CASE_OP_32_64_VEC(xor):
65
done = fold_xor(&ctx, op);
66
break;
67
+ default:
68
+ break;
69
}
70
71
if (!done) {
72
--
73
2.25.1
74
75
diff view generated by jsdifflib
Deleted patch
1
Pull the "op r, a, a => movi r, 0" optimization into a function,
2
and use it in the outer opcode fold functions.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
9
1 file changed, 24 insertions(+), 17 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
16
return false;
17
}
18
19
+/* If the binary operation has both arguments equal, fold to @i. */
20
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
21
+{
22
+ if (args_are_copies(op->args[1], op->args[2])) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
25
+ return false;
26
+}
27
+
28
/*
29
* These outermost fold_<op> functions are sorted alphabetically.
30
*/
31
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
32
33
static bool fold_andc(OptContext *ctx, TCGOp *op)
34
{
35
- return fold_const2(ctx, op);
36
+ if (fold_const2(ctx, op) ||
37
+ fold_xx_to_i(ctx, op, 0)) {
38
+ return true;
39
+ }
40
+ return false;
41
}
42
43
static bool fold_brcond(OptContext *ctx, TCGOp *op)
44
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
45
46
static bool fold_sub(OptContext *ctx, TCGOp *op)
47
{
48
- return fold_const2(ctx, op);
49
+ if (fold_const2(ctx, op) ||
50
+ fold_xx_to_i(ctx, op, 0)) {
51
+ return true;
52
+ }
53
+ return false;
54
}
55
56
static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
57
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
58
59
static bool fold_xor(OptContext *ctx, TCGOp *op)
60
{
61
- return fold_const2(ctx, op);
62
+ if (fold_const2(ctx, op) ||
63
+ fold_xx_to_i(ctx, op, 0)) {
64
+ return true;
65
+ }
66
+ return false;
67
}
68
69
/* Propagate constants and copies, fold constant expressions. */
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
break;
72
}
73
74
- /* Simplify expression for "op r, a, a => movi r, 0" cases */
75
- switch (opc) {
76
- CASE_OP_32_64_VEC(andc):
77
- CASE_OP_32_64_VEC(sub):
78
- CASE_OP_32_64_VEC(xor):
79
- if (args_are_copies(op->args[1], op->args[2])) {
80
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
81
- continue;
82
- }
83
- break;
84
- default:
85
- break;
86
- }
87
-
88
/*
89
* Process each opcode.
90
* Sorted alphabetically by opcode as much as possible.
91
--
92
2.25.1
93
94
diff view generated by jsdifflib
Deleted patch
1
Pull the "op r, a, a => mov r, a" optimization into a function,
2
and use it in the outer opcode fold functions.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
9
1 file changed, 24 insertions(+), 15 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
16
return false;
17
}
18
19
+/* If the binary operation has both arguments equal, fold to identity. */
20
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
21
+{
22
+ if (args_are_copies(op->args[1], op->args[2])) {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
24
+ }
25
+ return false;
26
+}
27
+
28
/*
29
* These outermost fold_<op> functions are sorted alphabetically.
30
+ *
31
+ * The ordering of the transformations should be:
32
+ * 1) those that produce a constant
33
+ * 2) those that produce a copy
34
+ * 3) those that produce information about the result value.
35
*/
36
37
static bool fold_add(OptContext *ctx, TCGOp *op)
38
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
39
40
static bool fold_and(OptContext *ctx, TCGOp *op)
41
{
42
- return fold_const2(ctx, op);
43
+ if (fold_const2(ctx, op) ||
44
+ fold_xx_to_x(ctx, op)) {
45
+ return true;
46
+ }
47
+ return false;
48
}
49
50
static bool fold_andc(OptContext *ctx, TCGOp *op)
51
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
52
53
static bool fold_or(OptContext *ctx, TCGOp *op)
54
{
55
- return fold_const2(ctx, op);
56
+ if (fold_const2(ctx, op) ||
57
+ fold_xx_to_x(ctx, op)) {
58
+ return true;
59
+ }
60
+ return false;
61
}
62
63
static bool fold_orc(OptContext *ctx, TCGOp *op)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
65
break;
66
}
67
68
- /* Simplify expression for "op r, a, a => mov r, a" cases */
69
- switch (opc) {
70
- CASE_OP_32_64_VEC(or):
71
- CASE_OP_32_64_VEC(and):
72
- if (args_are_copies(op->args[1], op->args[2])) {
73
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
74
- continue;
75
- }
76
- break;
77
- default:
78
- break;
79
- }
80
-
81
/*
82
* Process each opcode.
83
* Sorted alphabetically by opcode as much as possible.
84
--
85
2.25.1
86
87
diff view generated by jsdifflib
Deleted patch
1
Pull the "op r, a, 0 => movi r, 0" optimization into a function,
2
and use it in the outer opcode fold functions.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 38 ++++++++++++++++++++------------------
9
1 file changed, 20 insertions(+), 18 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
16
return false;
17
}
18
19
+/* If the binary operation has second argument @i, fold to @i. */
20
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
21
+{
22
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
25
+ return false;
26
+}
27
+
28
/* If the binary operation has both arguments equal, fold to @i. */
29
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
30
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
32
static bool fold_and(OptContext *ctx, TCGOp *op)
33
{
34
if (fold_const2(ctx, op) ||
35
+ fold_xi_to_i(ctx, op, 0) ||
36
fold_xx_to_x(ctx, op)) {
37
return true;
38
}
39
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
40
41
static bool fold_mul(OptContext *ctx, TCGOp *op)
42
{
43
- return fold_const2(ctx, op);
44
+ if (fold_const2(ctx, op) ||
45
+ fold_xi_to_i(ctx, op, 0)) {
46
+ return true;
47
+ }
48
+ return false;
49
}
50
51
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
52
{
53
- return fold_const2(ctx, op);
54
+ if (fold_const2(ctx, op) ||
55
+ fold_xi_to_i(ctx, op, 0)) {
56
+ return true;
57
+ }
58
+ return false;
59
}
60
61
static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
63
continue;
64
}
65
66
- /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
67
- switch (opc) {
68
- CASE_OP_32_64_VEC(and):
69
- CASE_OP_32_64_VEC(mul):
70
- CASE_OP_32_64(muluh):
71
- CASE_OP_32_64(mulsh):
72
- if (arg_is_const(op->args[2])
73
- && arg_info(op->args[2])->val == 0) {
74
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
75
- continue;
76
- }
77
- break;
78
- default:
79
- break;
80
- }
81
-
82
/*
83
* Process each opcode.
84
* Sorted alphabetically by opcode as much as possible.
85
--
86
2.25.1
87
88
diff view generated by jsdifflib
Deleted patch
1
Compute the type of the operation early.
2
1
3
There are at least 4 places that used a def->flags ladder
4
to determine the type of the operation being optimized.
5
6
There were two places that assumed !TCG_OPF_64BIT means
7
TCG_TYPE_I32, and so could potentially compute incorrect
8
results for vector operations.
9
10
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
14
1 file changed, 89 insertions(+), 60 deletions(-)
15
16
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/optimize.c
19
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
21
22
/* In flight values from optimization. */
23
uint64_t z_mask;
24
+ TCGType type;
25
} OptContext;
26
27
static inline TempOptInfo *ts_info(TCGTemp *ts)
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
29
{
30
TCGTemp *dst_ts = arg_temp(dst);
31
TCGTemp *src_ts = arg_temp(src);
32
- const TCGOpDef *def;
33
TempOptInfo *di;
34
TempOptInfo *si;
35
uint64_t z_mask;
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
37
reset_ts(dst_ts);
38
di = ts_info(dst_ts);
39
si = ts_info(src_ts);
40
- def = &tcg_op_defs[op->opc];
41
- if (def->flags & TCG_OPF_VECTOR) {
42
- new_op = INDEX_op_mov_vec;
43
- } else if (def->flags & TCG_OPF_64BIT) {
44
- new_op = INDEX_op_mov_i64;
45
- } else {
46
+
47
+ switch (ctx->type) {
48
+ case TCG_TYPE_I32:
49
new_op = INDEX_op_mov_i32;
50
+ break;
51
+ case TCG_TYPE_I64:
52
+ new_op = INDEX_op_mov_i64;
53
+ break;
54
+ case TCG_TYPE_V64:
55
+ case TCG_TYPE_V128:
56
+ case TCG_TYPE_V256:
57
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
58
+ new_op = INDEX_op_mov_vec;
59
+ break;
60
+ default:
61
+ g_assert_not_reached();
62
}
63
op->opc = new_op;
64
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
65
op->args[0] = dst;
66
op->args[1] = src;
67
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
69
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
70
TCGArg dst, uint64_t val)
71
{
72
- const TCGOpDef *def = &tcg_op_defs[op->opc];
73
- TCGType type;
74
- TCGTemp *tv;
75
-
76
- if (def->flags & TCG_OPF_VECTOR) {
77
- type = TCGOP_VECL(op) + TCG_TYPE_V64;
78
- } else if (def->flags & TCG_OPF_64BIT) {
79
- type = TCG_TYPE_I64;
80
- } else {
81
- type = TCG_TYPE_I32;
82
- }
83
-
84
/* Convert movi to mov with constant temp. */
85
- tv = tcg_constant_internal(type, val);
86
+ TCGTemp *tv = tcg_constant_internal(ctx->type, val);
87
+
88
init_ts_info(ctx, tv);
89
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
90
}
91
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
92
}
93
}
94
95
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
96
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
97
+ uint64_t x, uint64_t y)
98
{
99
- const TCGOpDef *def = &tcg_op_defs[op];
100
uint64_t res = do_constant_folding_2(op, x, y);
101
- if (!(def->flags & TCG_OPF_64BIT)) {
102
+ if (type == TCG_TYPE_I32) {
103
res = (int32_t)res;
104
}
105
return res;
106
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
107
* Return -1 if the condition can't be simplified,
108
* and the result of the condition (0 or 1) if it can.
109
*/
110
-static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
111
+static int do_constant_folding_cond(TCGType type, TCGArg x,
112
TCGArg y, TCGCond c)
113
{
114
uint64_t xv = arg_info(x)->val;
115
uint64_t yv = arg_info(y)->val;
116
117
if (arg_is_const(x) && arg_is_const(y)) {
118
- const TCGOpDef *def = &tcg_op_defs[op];
119
- tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
120
- if (def->flags & TCG_OPF_64BIT) {
121
- return do_constant_folding_cond_64(xv, yv, c);
122
- } else {
123
+ switch (type) {
124
+ case TCG_TYPE_I32:
125
return do_constant_folding_cond_32(xv, yv, c);
126
+ case TCG_TYPE_I64:
127
+ return do_constant_folding_cond_64(xv, yv, c);
128
+ default:
129
+ /* Only scalar comparisons are optimizable */
130
+ return -1;
131
}
132
} else if (args_are_copies(x, y)) {
133
return do_constant_folding_cond_eq(c);
134
@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
135
uint64_t t;
136
137
t = arg_info(op->args[1])->val;
138
- t = do_constant_folding(op->opc, t, 0);
139
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
140
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
141
}
142
return false;
143
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
144
uint64_t t1 = arg_info(op->args[1])->val;
145
uint64_t t2 = arg_info(op->args[2])->val;
146
147
- t1 = do_constant_folding(op->opc, t1, t2);
148
+ t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
149
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
150
}
151
return false;
152
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
153
static bool fold_brcond(OptContext *ctx, TCGOp *op)
154
{
155
TCGCond cond = op->args[2];
156
- int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
157
+ int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
158
159
if (i == 0) {
160
tcg_op_remove(ctx->tcg, op);
161
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
162
* Simplify EQ/NE comparisons where one of the pairs
163
* can be simplified.
164
*/
165
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
166
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
167
op->args[2], cond);
168
switch (i ^ inv) {
169
case 0:
170
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
171
goto do_brcond_high;
172
}
173
174
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
175
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
176
op->args[3], cond);
177
switch (i ^ inv) {
178
case 0:
179
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
180
if (arg_is_const(op->args[1])) {
181
uint64_t t = arg_info(op->args[1])->val;
182
183
- t = do_constant_folding(op->opc, t, op->args[2]);
184
+ t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
185
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
186
}
187
return false;
188
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
189
uint64_t t = arg_info(op->args[1])->val;
190
191
if (t != 0) {
192
- t = do_constant_folding(op->opc, t, 0);
193
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
194
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
195
}
196
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
197
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
198
199
static bool fold_movcond(OptContext *ctx, TCGOp *op)
200
{
201
- TCGOpcode opc = op->opc;
202
TCGCond cond = op->args[5];
203
- int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
204
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
205
206
if (i >= 0) {
207
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
208
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
209
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
210
uint64_t tv = arg_info(op->args[3])->val;
211
uint64_t fv = arg_info(op->args[4])->val;
212
+ TCGOpcode opc;
213
214
- opc = (opc == INDEX_op_movcond_i32
215
- ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
216
+ switch (ctx->type) {
217
+ case TCG_TYPE_I32:
218
+ opc = INDEX_op_setcond_i32;
219
+ break;
220
+ case TCG_TYPE_I64:
221
+ opc = INDEX_op_setcond_i64;
222
+ break;
223
+ default:
224
+ g_assert_not_reached();
225
+ }
226
227
if (tv == 1 && fv == 0) {
228
op->opc = opc;
229
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
230
static bool fold_setcond(OptContext *ctx, TCGOp *op)
231
{
232
TCGCond cond = op->args[3];
233
- int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
234
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
235
236
if (i >= 0) {
237
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
238
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
239
* Simplify EQ/NE comparisons where one of the pairs
240
* can be simplified.
241
*/
242
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
243
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
244
op->args[3], cond);
245
switch (i ^ inv) {
246
case 0:
247
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
248
goto do_setcond_high;
249
}
250
251
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
252
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
253
op->args[4], cond);
254
switch (i ^ inv) {
255
case 0:
256
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
257
init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
258
copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
259
260
+ /* Pre-compute the type of the operation. */
261
+ if (def->flags & TCG_OPF_VECTOR) {
262
+ ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
263
+ } else if (def->flags & TCG_OPF_64BIT) {
264
+ ctx.type = TCG_TYPE_I64;
265
+ } else {
266
+ ctx.type = TCG_TYPE_I32;
267
+ }
268
+
269
/* For commutative operations make constant second argument */
270
switch (opc) {
271
CASE_OP_32_64_VEC(add):
272
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
273
/* Proceed with possible constant folding. */
274
break;
275
}
276
- if (opc == INDEX_op_sub_i32) {
277
+ switch (ctx.type) {
278
+ case TCG_TYPE_I32:
279
neg_op = INDEX_op_neg_i32;
280
have_neg = TCG_TARGET_HAS_neg_i32;
281
- } else if (opc == INDEX_op_sub_i64) {
282
+ break;
283
+ case TCG_TYPE_I64:
284
neg_op = INDEX_op_neg_i64;
285
have_neg = TCG_TARGET_HAS_neg_i64;
286
- } else if (TCG_TARGET_HAS_neg_vec) {
287
- TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
288
- unsigned vece = TCGOP_VECE(op);
289
- neg_op = INDEX_op_neg_vec;
290
- have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
291
- } else {
292
break;
293
+ case TCG_TYPE_V64:
294
+ case TCG_TYPE_V128:
295
+ case TCG_TYPE_V256:
296
+ neg_op = INDEX_op_neg_vec;
297
+ have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
298
+ TCGOP_VECE(op)) > 0;
299
+ break;
300
+ default:
301
+ g_assert_not_reached();
302
}
303
if (!have_neg) {
304
break;
305
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
306
TCGOpcode not_op;
307
bool have_not;
308
309
- if (def->flags & TCG_OPF_VECTOR) {
310
- not_op = INDEX_op_not_vec;
311
- have_not = TCG_TARGET_HAS_not_vec;
312
- } else if (def->flags & TCG_OPF_64BIT) {
313
- not_op = INDEX_op_not_i64;
314
- have_not = TCG_TARGET_HAS_not_i64;
315
- } else {
316
+ switch (ctx.type) {
317
+ case TCG_TYPE_I32:
318
not_op = INDEX_op_not_i32;
319
have_not = TCG_TARGET_HAS_not_i32;
320
+ break;
321
+ case TCG_TYPE_I64:
322
+ not_op = INDEX_op_not_i64;
323
+ have_not = TCG_TARGET_HAS_not_i64;
324
+ break;
325
+ case TCG_TYPE_V64:
326
+ case TCG_TYPE_V128:
327
+ case TCG_TYPE_V256:
328
+ not_op = INDEX_op_not_vec;
329
+ have_not = TCG_TARGET_HAS_not_vec;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
}
334
if (!have_not) {
335
break;
336
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
337
below, we can ignore high bits, but for further optimizations we
338
need to record that the high bits contain garbage. */
339
partmask = z_mask;
340
- if (!(def->flags & TCG_OPF_64BIT)) {
341
+ if (ctx.type == TCG_TYPE_I32) {
342
z_mask |= ~(tcg_target_ulong)0xffffffffu;
343
partmask &= 0xffffffffu;
344
affected &= 0xffffffffu;
345
--
346
2.25.1
347
348
diff view generated by jsdifflib
Deleted patch
1
Split out the conditional conversion from a more complex logical
2
operation to a simple NOT. Create a couple more helpers to make
3
this easy for the outer-most logical operations.
4
1
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
9
1 file changed, 86 insertions(+), 72 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
16
return false;
17
}
18
19
+/*
20
+ * Convert @op to NOT, if NOT is supported by the host.
21
+ * Return true f the conversion is successful, which will still
22
+ * indicate that the processing is complete.
23
+ */
24
+static bool fold_not(OptContext *ctx, TCGOp *op);
25
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
26
+{
27
+ TCGOpcode not_op;
28
+ bool have_not;
29
+
30
+ switch (ctx->type) {
31
+ case TCG_TYPE_I32:
32
+ not_op = INDEX_op_not_i32;
33
+ have_not = TCG_TARGET_HAS_not_i32;
34
+ break;
35
+ case TCG_TYPE_I64:
36
+ not_op = INDEX_op_not_i64;
37
+ have_not = TCG_TARGET_HAS_not_i64;
38
+ break;
39
+ case TCG_TYPE_V64:
40
+ case TCG_TYPE_V128:
41
+ case TCG_TYPE_V256:
42
+ not_op = INDEX_op_not_vec;
43
+ have_not = TCG_TARGET_HAS_not_vec;
44
+ break;
45
+ default:
46
+ g_assert_not_reached();
47
+ }
48
+ if (have_not) {
49
+ op->opc = not_op;
50
+ op->args[1] = op->args[idx];
51
+ return fold_not(ctx, op);
52
+ }
53
+ return false;
54
+}
55
+
56
+/* If the binary operation has first argument @i, fold to NOT. */
57
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
58
+{
59
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
60
+ return fold_to_not(ctx, op, 2);
61
+ }
62
+ return false;
63
+}
64
+
65
/* If the binary operation has second argument @i, fold to @i. */
66
static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
67
{
68
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
69
return false;
70
}
71
72
+/* If the binary operation has second argument @i, fold to NOT. */
73
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
74
+{
75
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
76
+ return fold_to_not(ctx, op, 1);
77
+ }
78
+ return false;
79
+}
80
+
81
/* If the binary operation has both arguments equal, fold to @i. */
82
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
83
{
84
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
85
static bool fold_andc(OptContext *ctx, TCGOp *op)
86
{
87
if (fold_const2(ctx, op) ||
88
- fold_xx_to_i(ctx, op, 0)) {
89
+ fold_xx_to_i(ctx, op, 0) ||
90
+ fold_ix_to_not(ctx, op, -1)) {
91
return true;
92
}
93
return false;
94
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
95
96
static bool fold_eqv(OptContext *ctx, TCGOp *op)
97
{
98
- return fold_const2(ctx, op);
99
+ if (fold_const2(ctx, op) ||
100
+ fold_xi_to_not(ctx, op, 0)) {
101
+ return true;
102
+ }
103
+ return false;
104
}
105
106
static bool fold_extract(OptContext *ctx, TCGOp *op)
107
@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
108
109
static bool fold_nand(OptContext *ctx, TCGOp *op)
110
{
111
- return fold_const2(ctx, op);
112
+ if (fold_const2(ctx, op) ||
113
+ fold_xi_to_not(ctx, op, -1)) {
114
+ return true;
115
+ }
116
+ return false;
117
}
118
119
static bool fold_neg(OptContext *ctx, TCGOp *op)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
121
122
static bool fold_nor(OptContext *ctx, TCGOp *op)
123
{
124
- return fold_const2(ctx, op);
125
+ if (fold_const2(ctx, op) ||
126
+ fold_xi_to_not(ctx, op, 0)) {
127
+ return true;
128
+ }
129
+ return false;
130
}
131
132
static bool fold_not(OptContext *ctx, TCGOp *op)
133
{
134
- return fold_const1(ctx, op);
135
+ if (fold_const1(ctx, op)) {
136
+ return true;
137
+ }
138
+
139
+ /* Because of fold_to_not, we want to always return true, via finish. */
140
+ finish_folding(ctx, op);
141
+ return true;
142
}
143
144
static bool fold_or(OptContext *ctx, TCGOp *op)
145
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
146
147
static bool fold_orc(OptContext *ctx, TCGOp *op)
148
{
149
- return fold_const2(ctx, op);
150
+ if (fold_const2(ctx, op) ||
151
+ fold_ix_to_not(ctx, op, 0)) {
152
+ return true;
153
+ }
154
+ return false;
155
}
156
157
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
158
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
159
static bool fold_xor(OptContext *ctx, TCGOp *op)
160
{
161
if (fold_const2(ctx, op) ||
162
- fold_xx_to_i(ctx, op, 0)) {
163
+ fold_xx_to_i(ctx, op, 0) ||
164
+ fold_xi_to_not(ctx, op, -1)) {
165
return true;
166
}
167
return false;
168
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
169
}
170
}
171
break;
172
- CASE_OP_32_64_VEC(xor):
173
- CASE_OP_32_64(nand):
174
- if (!arg_is_const(op->args[1])
175
- && arg_is_const(op->args[2])
176
- && arg_info(op->args[2])->val == -1) {
177
- i = 1;
178
- goto try_not;
179
- }
180
- break;
181
- CASE_OP_32_64(nor):
182
- if (!arg_is_const(op->args[1])
183
- && arg_is_const(op->args[2])
184
- && arg_info(op->args[2])->val == 0) {
185
- i = 1;
186
- goto try_not;
187
- }
188
- break;
189
- CASE_OP_32_64_VEC(andc):
190
- if (!arg_is_const(op->args[2])
191
- && arg_is_const(op->args[1])
192
- && arg_info(op->args[1])->val == -1) {
193
- i = 2;
194
- goto try_not;
195
- }
196
- break;
197
- CASE_OP_32_64_VEC(orc):
198
- CASE_OP_32_64(eqv):
199
- if (!arg_is_const(op->args[2])
200
- && arg_is_const(op->args[1])
201
- && arg_info(op->args[1])->val == 0) {
202
- i = 2;
203
- goto try_not;
204
- }
205
- break;
206
- try_not:
207
- {
208
- TCGOpcode not_op;
209
- bool have_not;
210
-
211
- switch (ctx.type) {
212
- case TCG_TYPE_I32:
213
- not_op = INDEX_op_not_i32;
214
- have_not = TCG_TARGET_HAS_not_i32;
215
- break;
216
- case TCG_TYPE_I64:
217
- not_op = INDEX_op_not_i64;
218
- have_not = TCG_TARGET_HAS_not_i64;
219
- break;
220
- case TCG_TYPE_V64:
221
- case TCG_TYPE_V128:
222
- case TCG_TYPE_V256:
223
- not_op = INDEX_op_not_vec;
224
- have_not = TCG_TARGET_HAS_not_vec;
225
- break;
226
- default:
227
- g_assert_not_reached();
228
- }
229
- if (!have_not) {
230
- break;
231
- }
232
- op->opc = not_op;
233
- reset_temp(op->args[0]);
234
- op->args[1] = op->args[i];
235
- continue;
236
- }
237
default:
238
break;
239
}
240
--
241
2.25.1
242
243
diff view generated by jsdifflib
Deleted patch
1
Even though there is only one user, place this more complex
2
conversion into its own helper.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
8
1 file changed, 47 insertions(+), 42 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
15
16
static bool fold_neg(OptContext *ctx, TCGOp *op)
17
{
18
- return fold_const1(ctx, op);
19
+ if (fold_const1(ctx, op)) {
20
+ return true;
21
+ }
22
+ /*
23
+ * Because of fold_sub_to_neg, we want to always return true,
24
+ * via finish_folding.
25
+ */
26
+ finish_folding(ctx, op);
27
+ return true;
28
}
29
30
static bool fold_nor(OptContext *ctx, TCGOp *op)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
32
return fold_const2(ctx, op);
33
}
34
35
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
36
+{
37
+ TCGOpcode neg_op;
38
+ bool have_neg;
39
+
40
+ if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
41
+ return false;
42
+ }
43
+
44
+ switch (ctx->type) {
45
+ case TCG_TYPE_I32:
46
+ neg_op = INDEX_op_neg_i32;
47
+ have_neg = TCG_TARGET_HAS_neg_i32;
48
+ break;
49
+ case TCG_TYPE_I64:
50
+ neg_op = INDEX_op_neg_i64;
51
+ have_neg = TCG_TARGET_HAS_neg_i64;
52
+ break;
53
+ case TCG_TYPE_V64:
54
+ case TCG_TYPE_V128:
55
+ case TCG_TYPE_V256:
56
+ neg_op = INDEX_op_neg_vec;
57
+ have_neg = (TCG_TARGET_HAS_neg_vec &&
58
+ tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
59
+ break;
60
+ default:
61
+ g_assert_not_reached();
62
+ }
63
+ if (have_neg) {
64
+ op->opc = neg_op;
65
+ op->args[1] = op->args[2];
66
+ return fold_neg(ctx, op);
67
+ }
68
+ return false;
69
+}
70
+
71
static bool fold_sub(OptContext *ctx, TCGOp *op)
72
{
73
if (fold_const2(ctx, op) ||
74
- fold_xx_to_i(ctx, op, 0)) {
75
+ fold_xx_to_i(ctx, op, 0) ||
76
+ fold_sub_to_neg(ctx, op)) {
77
return true;
78
}
79
return false;
80
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
81
continue;
82
}
83
break;
84
- CASE_OP_32_64_VEC(sub):
85
- {
86
- TCGOpcode neg_op;
87
- bool have_neg;
88
-
89
- if (arg_is_const(op->args[2])) {
90
- /* Proceed with possible constant folding. */
91
- break;
92
- }
93
- switch (ctx.type) {
94
- case TCG_TYPE_I32:
95
- neg_op = INDEX_op_neg_i32;
96
- have_neg = TCG_TARGET_HAS_neg_i32;
97
- break;
98
- case TCG_TYPE_I64:
99
- neg_op = INDEX_op_neg_i64;
100
- have_neg = TCG_TARGET_HAS_neg_i64;
101
- break;
102
- case TCG_TYPE_V64:
103
- case TCG_TYPE_V128:
104
- case TCG_TYPE_V256:
105
- neg_op = INDEX_op_neg_vec;
106
- have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
107
- TCGOP_VECE(op)) > 0;
108
- break;
109
- default:
110
- g_assert_not_reached();
111
- }
112
- if (!have_neg) {
113
- break;
114
- }
115
- if (arg_is_const(op->args[1])
116
- && arg_info(op->args[1])->val == 0) {
117
- op->opc = neg_op;
118
- reset_temp(op->args[0]);
119
- op->args[1] = op->args[2];
120
- continue;
121
- }
122
- }
123
- break;
124
default:
125
break;
126
}
127
--
128
2.25.1
129
130
diff view generated by jsdifflib
Deleted patch
1
Pull the "op r, a, i => mov r, a" optimization into a function,
2
and use them in the outer-most logical operations.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
8
1 file changed, 26 insertions(+), 35 deletions(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
15
return false;
16
}
17
18
+/* If the binary operation has second argument @i, fold to identity. */
19
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
20
+{
21
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
23
+ }
24
+ return false;
25
+}
26
+
27
/* If the binary operation has second argument @i, fold to NOT. */
28
static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
29
{
30
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
31
32
static bool fold_add(OptContext *ctx, TCGOp *op)
33
{
34
- return fold_const2(ctx, op);
35
+ if (fold_const2(ctx, op) ||
36
+ fold_xi_to_x(ctx, op, 0)) {
37
+ return true;
38
+ }
39
+ return false;
40
}
41
42
static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
43
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
44
{
45
if (fold_const2(ctx, op) ||
46
fold_xi_to_i(ctx, op, 0) ||
47
+ fold_xi_to_x(ctx, op, -1) ||
48
fold_xx_to_x(ctx, op)) {
49
return true;
50
}
51
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
52
{
53
if (fold_const2(ctx, op) ||
54
fold_xx_to_i(ctx, op, 0) ||
55
+ fold_xi_to_x(ctx, op, 0) ||
56
fold_ix_to_not(ctx, op, -1)) {
57
return true;
58
}
59
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
60
static bool fold_eqv(OptContext *ctx, TCGOp *op)
61
{
62
if (fold_const2(ctx, op) ||
63
+ fold_xi_to_x(ctx, op, -1) ||
64
fold_xi_to_not(ctx, op, 0)) {
65
return true;
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
68
static bool fold_or(OptContext *ctx, TCGOp *op)
69
{
70
if (fold_const2(ctx, op) ||
71
+ fold_xi_to_x(ctx, op, 0) ||
72
fold_xx_to_x(ctx, op)) {
73
return true;
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
76
static bool fold_orc(OptContext *ctx, TCGOp *op)
77
{
78
if (fold_const2(ctx, op) ||
79
+ fold_xi_to_x(ctx, op, -1) ||
80
fold_ix_to_not(ctx, op, 0)) {
81
return true;
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
84
85
static bool fold_shift(OptContext *ctx, TCGOp *op)
86
{
87
- return fold_const2(ctx, op);
88
+ if (fold_const2(ctx, op) ||
89
+ fold_xi_to_x(ctx, op, 0)) {
90
+ return true;
91
+ }
92
+ return false;
93
}
94
95
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
96
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
97
{
98
if (fold_const2(ctx, op) ||
99
fold_xx_to_i(ctx, op, 0) ||
100
+ fold_xi_to_x(ctx, op, 0) ||
101
fold_sub_to_neg(ctx, op)) {
102
return true;
103
}
104
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
105
{
106
if (fold_const2(ctx, op) ||
107
fold_xx_to_i(ctx, op, 0) ||
108
+ fold_xi_to_x(ctx, op, 0) ||
109
fold_xi_to_not(ctx, op, -1)) {
110
return true;
111
}
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
break;
114
}
115
116
- /* Simplify expression for "op r, a, const => mov r, a" cases */
117
- switch (opc) {
118
- CASE_OP_32_64_VEC(add):
119
- CASE_OP_32_64_VEC(sub):
120
- CASE_OP_32_64_VEC(or):
121
- CASE_OP_32_64_VEC(xor):
122
- CASE_OP_32_64_VEC(andc):
123
- CASE_OP_32_64(shl):
124
- CASE_OP_32_64(shr):
125
- CASE_OP_32_64(sar):
126
- CASE_OP_32_64(rotl):
127
- CASE_OP_32_64(rotr):
128
- if (!arg_is_const(op->args[1])
129
- && arg_is_const(op->args[2])
130
- && arg_info(op->args[2])->val == 0) {
131
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
132
- continue;
133
- }
134
- break;
135
- CASE_OP_32_64_VEC(and):
136
- CASE_OP_32_64_VEC(orc):
137
- CASE_OP_32_64(eqv):
138
- if (!arg_is_const(op->args[1])
139
- && arg_is_const(op->args[2])
140
- && arg_info(op->args[2])->val == -1) {
141
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
142
- continue;
143
- }
144
- break;
145
- default:
146
- break;
147
- }
148
-
149
/* Simplify using known-zero bits. Currently only ops with a single
150
output argument is supported. */
151
z_mask = -1;
152
--
153
2.25.1
154
155
diff view generated by jsdifflib
Deleted patch
1
Pull the "op r, 0, b => movi r, 0" optimization into a function,
2
and use it in fold_shift.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 28 ++++++++++------------------
9
1 file changed, 10 insertions(+), 18 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
16
return false;
17
}
18
19
+/* If the binary operation has first argument @i, fold to @i. */
20
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
21
+{
22
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
25
+ return false;
26
+}
27
+
28
/* If the binary operation has first argument @i, fold to NOT. */
29
static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
30
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
32
static bool fold_shift(OptContext *ctx, TCGOp *op)
33
{
34
if (fold_const2(ctx, op) ||
35
+ fold_ix_to_i(ctx, op, 0) ||
36
fold_xi_to_x(ctx, op, 0)) {
37
return true;
38
}
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
40
break;
41
}
42
43
- /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
44
- and "sub r, 0, a => neg r, a" case. */
45
- switch (opc) {
46
- CASE_OP_32_64(shl):
47
- CASE_OP_32_64(shr):
48
- CASE_OP_32_64(sar):
49
- CASE_OP_32_64(rotl):
50
- CASE_OP_32_64(rotr):
51
- if (arg_is_const(op->args[1])
52
- && arg_info(op->args[1])->val == 0) {
53
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
54
- continue;
55
- }
56
- break;
57
- default:
58
- break;
59
- }
60
-
61
/* Simplify using known-zero bits. Currently only ops with a single
62
output argument is supported. */
63
z_mask = -1;
64
--
65
2.25.1
66
67
diff view generated by jsdifflib
1
The results are generally 6 bit unsigned values, though
1
Change the semantics to be the last byte of the guest va, rather
2
the count leading and trailing bits may produce any value
2
than the following byte. This avoids some overflow conditions.
3
for a zero input.
3
4
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
6
---
9
tcg/optimize.c | 3 ++-
7
include/exec/cpu-all.h | 11 ++++++++++-
10
1 file changed, 2 insertions(+), 1 deletion(-)
8
linux-user/arm/target_cpu.h | 2 +-
11
9
bsd-user/main.c | 10 +++-------
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
bsd-user/mmap.c | 4 ++--
13
index XXXXXXX..XXXXXXX 100644
11
linux-user/elfload.c | 14 +++++++-------
14
--- a/tcg/optimize.c
12
linux-user/main.c | 27 +++++++++++++--------------
15
+++ b/tcg/optimize.c
13
linux-user/mmap.c | 4 ++--
16
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
14
7 files changed, 38 insertions(+), 34 deletions(-)
17
g_assert_not_reached();
15
18
}
16
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
19
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
17
index XXXXXXX..XXXXXXX 100644
20
-
18
--- a/include/exec/cpu-all.h
21
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
19
+++ b/include/exec/cpu-all.h
22
return false;
20
@@ -XXX,XX +XXX,XX @@ static inline void tswap64s(uint64_t *s)
21
*/
22
extern uintptr_t guest_base;
23
extern bool have_guest_base;
24
+
25
+/*
26
+ * If non-zero, the guest virtual address space is a contiguous subset
27
+ * of the host virtual address space, i.e. '-R reserved_va' is in effect
28
+ * either from the command-line or by default. The value is the last
29
+ * byte of the guest address space e.g. UINT32_MAX.
30
+ *
31
+ * If zero, the host and guest virtual address spaces are intermingled.
32
+ */
33
extern unsigned long reserved_va;
34
35
/*
36
@@ -XXX,XX +XXX,XX @@ extern unsigned long reserved_va;
37
#define GUEST_ADDR_MAX_ \
38
((MIN_CONST(TARGET_VIRT_ADDR_SPACE_BITS, TARGET_ABI_BITS) <= 32) ? \
39
UINT32_MAX : ~0ul)
40
-#define GUEST_ADDR_MAX (reserved_va ? reserved_va - 1 : GUEST_ADDR_MAX_)
41
+#define GUEST_ADDR_MAX (reserved_va ? : GUEST_ADDR_MAX_)
42
43
#else
44
45
diff --git a/linux-user/arm/target_cpu.h b/linux-user/arm/target_cpu.h
46
index XXXXXXX..XXXXXXX 100644
47
--- a/linux-user/arm/target_cpu.h
48
+++ b/linux-user/arm/target_cpu.h
49
@@ -XXX,XX +XXX,XX @@ static inline unsigned long arm_max_reserved_va(CPUState *cs)
50
* the high addresses. Restrict linux-user to the
51
* cached write-back RAM in the system map.
52
*/
53
- return 0x80000000ul;
54
+ return 0x7ffffffful;
55
} else {
56
/*
57
* We need to be able to map the commpage.
58
diff --git a/bsd-user/main.c b/bsd-user/main.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/bsd-user/main.c
61
+++ b/bsd-user/main.c
62
@@ -XXX,XX +XXX,XX @@ bool have_guest_base;
63
# if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS
64
# if TARGET_VIRT_ADDR_SPACE_BITS == 32 && \
65
(TARGET_LONG_BITS == 32 || defined(TARGET_ABI32))
66
-/*
67
- * There are a number of places where we assign reserved_va to a variable
68
- * of type abi_ulong and expect it to fit. Avoid the last page.
69
- */
70
-# define MAX_RESERVED_VA (0xfffffffful & TARGET_PAGE_MASK)
71
+# define MAX_RESERVED_VA 0xfffffffful
72
# else
73
-# define MAX_RESERVED_VA (1ul << TARGET_VIRT_ADDR_SPACE_BITS)
74
+# define MAX_RESERVED_VA ((1ul << TARGET_VIRT_ADDR_SPACE_BITS) - 1)
75
# endif
76
# else
77
# define MAX_RESERVED_VA 0
78
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
79
envlist_free(envlist);
80
81
if (reserved_va) {
82
- mmap_next_start = reserved_va;
83
+ mmap_next_start = reserved_va + 1;
84
}
85
86
{
87
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/bsd-user/mmap.c
90
+++ b/bsd-user/mmap.c
91
@@ -XXX,XX +XXX,XX @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
92
size = HOST_PAGE_ALIGN(size) + alignment;
93
end_addr = start + size;
94
if (end_addr > reserved_va) {
95
- end_addr = reserved_va;
96
+ end_addr = reserved_va + 1;
97
}
98
addr = end_addr - qemu_host_page_size;
99
100
@@ -XXX,XX +XXX,XX @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
101
if (looped) {
102
return (abi_ulong)-1;
103
}
104
- end_addr = reserved_va;
105
+ end_addr = reserved_va + 1;
106
addr = end_addr - qemu_host_page_size;
107
looped = 1;
108
continue;
109
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
110
index XXXXXXX..XXXXXXX 100644
111
--- a/linux-user/elfload.c
112
+++ b/linux-user/elfload.c
113
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
114
* has specified -R reserved_va, which would trigger an assert().
115
*/
116
if (reserved_va != 0 &&
117
- TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE >= reserved_va) {
118
+ TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE - 1 > reserved_va) {
119
error_report("Cannot allocate vsyscall page");
120
exit(EXIT_FAILURE);
121
}
122
@@ -XXX,XX +XXX,XX @@ static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
123
if (guest_hiaddr > reserved_va) {
124
error_report("%s: requires more than reserved virtual "
125
"address space (0x%" PRIx64 " > 0x%lx)",
126
- image_name, (uint64_t)guest_hiaddr + 1, reserved_va);
127
+ image_name, (uint64_t)guest_hiaddr, reserved_va);
128
exit(EXIT_FAILURE);
129
}
130
} else {
131
@@ -XXX,XX +XXX,XX @@ static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
132
if (reserved_va) {
133
guest_loaddr = (guest_base >= mmap_min_addr ? 0
134
: mmap_min_addr - guest_base);
135
- guest_hiaddr = reserved_va - 1;
136
+ guest_hiaddr = reserved_va;
137
}
138
139
/* Reserve the address space for the binary, or reserved_va. */
140
@@ -XXX,XX +XXX,XX @@ static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
141
if (guest_hiaddr > reserved_va) {
142
error_report("%s: requires more than reserved virtual "
143
"address space (0x%" PRIx64 " > 0x%lx)",
144
- image_name, (uint64_t)guest_hiaddr + 1, reserved_va);
145
+ image_name, (uint64_t)guest_hiaddr, reserved_va);
146
exit(EXIT_FAILURE);
147
}
148
149
@@ -XXX,XX +XXX,XX @@ static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
150
/* Reserve the memory on the host. */
151
assert(guest_base != 0);
152
test = g2h_untagged(0);
153
- addr = mmap(test, reserved_va, PROT_NONE, flags, -1, 0);
154
+ addr = mmap(test, reserved_va + 1, PROT_NONE, flags, -1, 0);
155
if (addr == MAP_FAILED || addr != test) {
156
error_report("Unable to reserve 0x%lx bytes of virtual address "
157
"space at %p (%s) for use as guest address space (check your "
158
"virtual memory ulimit setting, min_mmap_addr or reserve less "
159
- "using -R option)", reserved_va, test, strerror(errno));
160
+ "using -R option)", reserved_va + 1, test, strerror(errno));
161
exit(EXIT_FAILURE);
162
}
163
164
qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %p for %lu bytes\n",
165
- __func__, addr, reserved_va);
166
+ __func__, addr, reserved_va + 1);
23
}
167
}
24
168
25
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
169
void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
26
default:
170
diff --git a/linux-user/main.c b/linux-user/main.c
27
g_assert_not_reached();
171
index XXXXXXX..XXXXXXX 100644
28
}
172
--- a/linux-user/main.c
29
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
173
+++ b/linux-user/main.c
30
return false;
174
@@ -XXX,XX +XXX,XX @@ static const char *last_log_filename;
175
# if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS
176
# if TARGET_VIRT_ADDR_SPACE_BITS == 32 && \
177
(TARGET_LONG_BITS == 32 || defined(TARGET_ABI32))
178
-/* There are a number of places where we assign reserved_va to a variable
179
- of type abi_ulong and expect it to fit. Avoid the last page. */
180
-# define MAX_RESERVED_VA(CPU) (0xfffffffful & TARGET_PAGE_MASK)
181
+# define MAX_RESERVED_VA(CPU) 0xfffffffful
182
# else
183
-# define MAX_RESERVED_VA(CPU) (1ul << TARGET_VIRT_ADDR_SPACE_BITS)
184
+# define MAX_RESERVED_VA(CPU) ((1ul << TARGET_VIRT_ADDR_SPACE_BITS) - 1)
185
# endif
186
# else
187
# define MAX_RESERVED_VA(CPU) 0
188
@@ -XXX,XX +XXX,XX @@ static void handle_arg_reserved_va(const char *arg)
189
{
190
char *p;
191
int shift = 0;
192
- reserved_va = strtoul(arg, &p, 0);
193
+ unsigned long val;
194
+
195
+ val = strtoul(arg, &p, 0);
196
switch (*p) {
197
case 'k':
198
case 'K':
199
@@ -XXX,XX +XXX,XX @@ static void handle_arg_reserved_va(const char *arg)
200
break;
201
}
202
if (shift) {
203
- unsigned long unshifted = reserved_va;
204
+ unsigned long unshifted = val;
205
p++;
206
- reserved_va <<= shift;
207
- if (reserved_va >> shift != unshifted) {
208
+ val <<= shift;
209
+ if (val >> shift != unshifted) {
210
fprintf(stderr, "Reserved virtual address too big\n");
211
exit(EXIT_FAILURE);
212
}
213
@@ -XXX,XX +XXX,XX @@ static void handle_arg_reserved_va(const char *arg)
214
fprintf(stderr, "Unrecognised -R size suffix '%s'\n", p);
215
exit(EXIT_FAILURE);
216
}
217
+ /* The representation is size - 1, with 0 remaining "default". */
218
+ reserved_va = val ? val - 1 : 0;
31
}
219
}
32
220
221
static void handle_arg_singlestep(const char *arg)
222
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
223
*/
224
max_reserved_va = MAX_RESERVED_VA(cpu);
225
if (reserved_va != 0) {
226
- if (reserved_va % qemu_host_page_size) {
227
+ if ((reserved_va + 1) % qemu_host_page_size) {
228
char *s = size_to_str(qemu_host_page_size);
229
fprintf(stderr, "Reserved virtual address not aligned mod %s\n", s);
230
g_free(s);
231
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
232
exit(EXIT_FAILURE);
233
}
234
} else if (HOST_LONG_BITS == 64 && TARGET_VIRT_ADDR_SPACE_BITS <= 32) {
235
- /*
236
- * reserved_va must be aligned with the host page size
237
- * as it is used with mmap()
238
- */
239
- reserved_va = max_reserved_va & qemu_host_page_mask;
240
+ /* MAX_RESERVED_VA + 1 is a large power of 2, so is aligned. */
241
+ reserved_va = max_reserved_va;
242
}
243
244
{
245
diff --git a/linux-user/mmap.c b/linux-user/mmap.c
246
index XXXXXXX..XXXXXXX 100644
247
--- a/linux-user/mmap.c
248
+++ b/linux-user/mmap.c
249
@@ -XXX,XX +XXX,XX @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
250
end_addr = start + size;
251
if (start > reserved_va - size) {
252
/* Start at the top of the address space. */
253
- end_addr = ((reserved_va - size) & -align) + size;
254
+ end_addr = ((reserved_va + 1 - size) & -align) + size;
255
looped = true;
256
}
257
258
@@ -XXX,XX +XXX,XX @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
259
return (abi_ulong)-1;
260
}
261
/* Re-start at the top of the address space. */
262
- addr = end_addr = ((reserved_va - size) & -align) + size;
263
+ addr = end_addr = ((reserved_va + 1 - size) & -align) + size;
264
looped = true;
265
} else {
266
prot = page_get_flags(addr);
33
--
267
--
34
2.25.1
268
2.34.1
35
269
36
270
diff view generated by jsdifflib
1
Move all of the known-zero optimizations into the per-opcode
1
User setting of -R reserved_va can lead to an assertion
2
functions. Use fold_masks when there is a possibility of the
2
failure in page_set_flags. Sanity check the value of
3
result being determined, and simply set ctx->z_mask otherwise.
3
reserved_va and print an error message instead. Do not
4
allocate a commpage at all for m-profile cpus.
4
5
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
8
linux-user/elfload.c | 37 +++++++++++++++++++++++++++----------
10
1 file changed, 294 insertions(+), 251 deletions(-)
9
1 file changed, 27 insertions(+), 10 deletions(-)
11
10
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
13
--- a/linux-user/elfload.c
15
+++ b/tcg/optimize.c
14
+++ b/linux-user/elfload.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
15
@@ -XXX,XX +XXX,XX @@ enum {
17
TCGTempSet temps_used;
16
18
17
static bool init_guest_commpage(void)
19
/* In flight values from optimization. */
18
{
20
- uint64_t z_mask;
19
- abi_ptr commpage = HI_COMMPAGE & -qemu_host_page_size;
21
+ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
20
- void *want = g2h_untagged(commpage);
22
+ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
21
- void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE,
23
TCGType type;
22
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
24
} OptContext;
23
+ ARMCPU *cpu = ARM_CPU(thread_cpu);
25
24
+ abi_ptr want = HI_COMMPAGE & TARGET_PAGE_MASK;
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
25
+ abi_ptr addr;
27
return false;
26
28
}
27
- if (addr == MAP_FAILED) {
29
30
+static bool fold_masks(OptContext *ctx, TCGOp *op)
31
+{
32
+ uint64_t a_mask = ctx->a_mask;
33
+ uint64_t z_mask = ctx->z_mask;
34
+
35
+ /*
28
+ /*
36
+ * 32-bit ops generate 32-bit results. For the result is zero test
29
+ * M-profile allocates maximum of 2GB address space, so can never
37
+ * below, we can ignore high bits, but for further optimizations we
30
+ * allocate the commpage. Skip it.
38
+ * need to record that the high bits contain garbage.
39
+ */
31
+ */
40
+ if (ctx->type == TCG_TYPE_I32) {
32
+ if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
41
+ ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
42
+ a_mask &= MAKE_64BIT_MASK(0, 32);
43
+ z_mask &= MAKE_64BIT_MASK(0, 32);
44
+ }
45
+
46
+ if (z_mask == 0) {
47
+ return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
48
+ }
49
+ if (a_mask == 0) {
50
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
51
+ }
52
+ return false;
53
+}
54
+
55
/*
56
* Convert @op to NOT, if NOT is supported by the host.
57
* Return true f the conversion is successful, which will still
58
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
59
60
static bool fold_and(OptContext *ctx, TCGOp *op)
61
{
62
+ uint64_t z1, z2;
63
+
64
if (fold_const2(ctx, op) ||
65
fold_xi_to_i(ctx, op, 0) ||
66
fold_xi_to_x(ctx, op, -1) ||
67
fold_xx_to_x(ctx, op)) {
68
return true;
69
}
70
- return false;
71
+
72
+ z1 = arg_info(op->args[1])->z_mask;
73
+ z2 = arg_info(op->args[2])->z_mask;
74
+ ctx->z_mask = z1 & z2;
75
+
76
+ /*
77
+ * Known-zeros does not imply known-ones. Therefore unless
78
+ * arg2 is constant, we can't infer affected bits from it.
79
+ */
80
+ if (arg_is_const(op->args[2])) {
81
+ ctx->a_mask = z1 & ~z2;
82
+ }
83
+
84
+ return fold_masks(ctx, op);
85
}
86
87
static bool fold_andc(OptContext *ctx, TCGOp *op)
88
{
89
+ uint64_t z1;
90
+
91
if (fold_const2(ctx, op) ||
92
fold_xx_to_i(ctx, op, 0) ||
93
fold_xi_to_x(ctx, op, 0) ||
94
fold_ix_to_not(ctx, op, -1)) {
95
return true;
96
}
97
- return false;
98
+
99
+ z1 = arg_info(op->args[1])->z_mask;
100
+
101
+ /*
102
+ * Known-zeros does not imply known-ones. Therefore unless
103
+ * arg2 is constant, we can't infer anything from it.
104
+ */
105
+ if (arg_is_const(op->args[2])) {
106
+ uint64_t z2 = ~arg_info(op->args[2])->z_mask;
107
+ ctx->a_mask = z1 & ~z2;
108
+ z1 &= z2;
109
+ }
110
+ ctx->z_mask = z1;
111
+
112
+ return fold_masks(ctx, op);
113
}
114
115
static bool fold_brcond(OptContext *ctx, TCGOp *op)
116
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
117
118
static bool fold_bswap(OptContext *ctx, TCGOp *op)
119
{
120
+ uint64_t z_mask, sign;
121
+
122
if (arg_is_const(op->args[1])) {
123
uint64_t t = arg_info(op->args[1])->val;
124
125
t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
126
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
127
}
128
- return false;
129
+
130
+ z_mask = arg_info(op->args[1])->z_mask;
131
+ switch (op->opc) {
132
+ case INDEX_op_bswap16_i32:
133
+ case INDEX_op_bswap16_i64:
134
+ z_mask = bswap16(z_mask);
135
+ sign = INT16_MIN;
136
+ break;
137
+ case INDEX_op_bswap32_i32:
138
+ case INDEX_op_bswap32_i64:
139
+ z_mask = bswap32(z_mask);
140
+ sign = INT32_MIN;
141
+ break;
142
+ case INDEX_op_bswap64_i64:
143
+ z_mask = bswap64(z_mask);
144
+ sign = INT64_MIN;
145
+ break;
146
+ default:
147
+ g_assert_not_reached();
148
+ }
149
+
150
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
151
+ case TCG_BSWAP_OZ:
152
+ break;
153
+ case TCG_BSWAP_OS:
154
+ /* If the sign bit may be 1, force all the bits above to 1. */
155
+ if (z_mask & sign) {
156
+ z_mask |= sign;
157
+ }
158
+ break;
159
+ default:
160
+ /* The high bits are undefined: force all bits above the sign to 1. */
161
+ z_mask |= sign << 1;
162
+ break;
163
+ }
164
+ ctx->z_mask = z_mask;
165
+
166
+ return fold_masks(ctx, op);
167
}
168
169
static bool fold_call(OptContext *ctx, TCGOp *op)
170
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
171
172
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
173
{
174
+ uint64_t z_mask;
175
+
176
if (arg_is_const(op->args[1])) {
177
uint64_t t = arg_info(op->args[1])->val;
178
179
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
180
}
181
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
182
}
183
+
184
+ switch (ctx->type) {
185
+ case TCG_TYPE_I32:
186
+ z_mask = 31;
187
+ break;
188
+ case TCG_TYPE_I64:
189
+ z_mask = 63;
190
+ break;
191
+ default:
192
+ g_assert_not_reached();
193
+ }
194
+ ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
195
+
196
return false;
197
}
198
199
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
200
{
201
- return fold_const1(ctx, op);
202
+ if (fold_const1(ctx, op)) {
203
+ return true;
33
+ return true;
204
+ }
34
+ }
205
+
35
+
206
+ switch (ctx->type) {
36
+ /*
207
+ case TCG_TYPE_I32:
37
+ * If reserved_va does not cover the commpage, we get an assert
208
+ ctx->z_mask = 32 | 31;
38
+ * in page_set_flags. Produce an intelligent error instead.
209
+ break;
39
+ */
210
+ case TCG_TYPE_I64:
40
+ if (reserved_va != 0 && want + TARGET_PAGE_SIZE - 1 > reserved_va) {
211
+ ctx->z_mask = 64 | 63;
41
+ error_report("Allocating guest commpage: -R 0x%" PRIx64 " too small",
212
+ break;
42
+ (uint64_t)reserved_va + 1);
213
+ default:
43
+ exit(EXIT_FAILURE);
214
+ g_assert_not_reached();
215
+ }
216
+ return false;
217
}
218
219
static bool fold_deposit(OptContext *ctx, TCGOp *op)
220
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
221
t1 = deposit64(t1, op->args[3], op->args[4], t2);
222
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
223
}
224
+
225
+ ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
226
+ op->args[3], op->args[4],
227
+ arg_info(op->args[2])->z_mask);
228
return false;
229
}
230
231
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
232
233
static bool fold_extract(OptContext *ctx, TCGOp *op)
234
{
235
+ uint64_t z_mask_old, z_mask;
236
+
237
if (arg_is_const(op->args[1])) {
238
uint64_t t;
239
240
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
241
t = extract64(t, op->args[2], op->args[3]);
242
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
243
}
244
- return false;
245
+
246
+ z_mask_old = arg_info(op->args[1])->z_mask;
247
+ z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
248
+ if (op->args[2] == 0) {
249
+ ctx->a_mask = z_mask_old ^ z_mask;
250
+ }
251
+ ctx->z_mask = z_mask;
252
+
253
+ return fold_masks(ctx, op);
254
}
255
256
static bool fold_extract2(OptContext *ctx, TCGOp *op)
257
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
258
259
static bool fold_exts(OptContext *ctx, TCGOp *op)
260
{
261
- return fold_const1(ctx, op);
262
+ uint64_t z_mask_old, z_mask, sign;
263
+ bool type_change = false;
264
+
265
+ if (fold_const1(ctx, op)) {
266
+ return true;
267
+ }
44
+ }
268
+
45
+
269
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
46
+ addr = target_mmap(want, TARGET_PAGE_SIZE, PROT_READ | PROT_WRITE,
47
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
270
+
48
+
271
+ switch (op->opc) {
49
+ if (addr == -1) {
272
+ CASE_OP_32_64(ext8s):
50
perror("Allocating guest commpage");
273
+ sign = INT8_MIN;
51
exit(EXIT_FAILURE);
274
+ z_mask = (uint8_t)z_mask;
52
}
275
+ break;
53
@@ -XXX,XX +XXX,XX @@ static bool init_guest_commpage(void)
276
+ CASE_OP_32_64(ext16s):
54
}
277
+ sign = INT16_MIN;
55
278
+ z_mask = (uint16_t)z_mask;
56
/* Set kernel helper versions; rest of page is 0. */
279
+ break;
57
- __put_user(5, (uint32_t *)g2h_untagged(0xffff0ffcu));
280
+ case INDEX_op_ext_i32_i64:
58
+ put_user_u32(5, 0xffff0ffcu);
281
+ type_change = true;
59
282
+ QEMU_FALLTHROUGH;
60
- if (mprotect(addr, qemu_host_page_size, PROT_READ)) {
283
+ case INDEX_op_ext32s_i64:
61
+ if (target_mprotect(addr, qemu_host_page_size, PROT_READ | PROT_EXEC)) {
284
+ sign = INT32_MIN;
62
perror("Protecting guest commpage");
285
+ z_mask = (uint32_t)z_mask;
63
exit(EXIT_FAILURE);
286
+ break;
64
}
287
+ default:
65
-
288
+ g_assert_not_reached();
66
- page_set_flags(commpage, commpage | ~qemu_host_page_mask,
289
+ }
67
- PAGE_READ | PAGE_EXEC | PAGE_VALID);
290
+
68
return true;
291
+ if (z_mask & sign) {
292
+ z_mask |= sign;
293
+ } else if (!type_change) {
294
+ ctx->a_mask = z_mask_old ^ z_mask;
295
+ }
296
+ ctx->z_mask = z_mask;
297
+
298
+ return fold_masks(ctx, op);
299
}
69
}
300
70
301
static bool fold_extu(OptContext *ctx, TCGOp *op)
302
{
303
- return fold_const1(ctx, op);
304
+ uint64_t z_mask_old, z_mask;
305
+ bool type_change = false;
306
+
307
+ if (fold_const1(ctx, op)) {
308
+ return true;
309
+ }
310
+
311
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
312
+
313
+ switch (op->opc) {
314
+ CASE_OP_32_64(ext8u):
315
+ z_mask = (uint8_t)z_mask;
316
+ break;
317
+ CASE_OP_32_64(ext16u):
318
+ z_mask = (uint16_t)z_mask;
319
+ break;
320
+ case INDEX_op_extrl_i64_i32:
321
+ case INDEX_op_extu_i32_i64:
322
+ type_change = true;
323
+ QEMU_FALLTHROUGH;
324
+ case INDEX_op_ext32u_i64:
325
+ z_mask = (uint32_t)z_mask;
326
+ break;
327
+ case INDEX_op_extrh_i64_i32:
328
+ type_change = true;
329
+ z_mask >>= 32;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
+ }
334
+
335
+ ctx->z_mask = z_mask;
336
+ if (!type_change) {
337
+ ctx->a_mask = z_mask_old ^ z_mask;
338
+ }
339
+ return fold_masks(ctx, op);
340
}
341
342
static bool fold_mb(OptContext *ctx, TCGOp *op)
343
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
344
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
345
}
346
347
+ ctx->z_mask = arg_info(op->args[3])->z_mask
348
+ | arg_info(op->args[4])->z_mask;
349
+
350
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
351
uint64_t tv = arg_info(op->args[3])->val;
352
uint64_t fv = arg_info(op->args[4])->val;
353
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
354
355
static bool fold_neg(OptContext *ctx, TCGOp *op)
356
{
357
+ uint64_t z_mask;
358
+
359
if (fold_const1(ctx, op)) {
360
return true;
361
}
362
+
363
+ /* Set to 1 all bits to the left of the rightmost. */
364
+ z_mask = arg_info(op->args[1])->z_mask;
365
+ ctx->z_mask = -(z_mask & -z_mask);
366
+
367
/*
368
* Because of fold_sub_to_neg, we want to always return true,
369
* via finish_folding.
370
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
371
fold_xx_to_x(ctx, op)) {
372
return true;
373
}
374
- return false;
375
+
376
+ ctx->z_mask = arg_info(op->args[1])->z_mask
377
+ | arg_info(op->args[2])->z_mask;
378
+ return fold_masks(ctx, op);
379
}
380
381
static bool fold_orc(OptContext *ctx, TCGOp *op)
382
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
383
384
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
385
{
386
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
387
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
388
+ MemOp mop = get_memop(oi);
389
+ int width = 8 * memop_size(mop);
390
+
391
+ if (!(mop & MO_SIGN) && width < 64) {
392
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
393
+ }
394
+
395
/* Opcodes that touch guest memory stop the mb optimization. */
396
ctx->prev_mb = NULL;
397
return false;
398
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
399
if (i >= 0) {
400
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
401
}
402
+
403
+ ctx->z_mask = 1;
404
return false;
405
}
406
407
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
408
op->opc = INDEX_op_setcond_i32;
409
break;
410
}
411
+
412
+ ctx->z_mask = 1;
413
return false;
414
415
do_setcond_const:
416
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
417
418
static bool fold_sextract(OptContext *ctx, TCGOp *op)
419
{
420
+ int64_t z_mask_old, z_mask;
421
+
422
if (arg_is_const(op->args[1])) {
423
uint64_t t;
424
425
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
426
t = sextract64(t, op->args[2], op->args[3]);
427
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
428
}
429
- return false;
430
+
431
+ z_mask_old = arg_info(op->args[1])->z_mask;
432
+ z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
433
+ if (op->args[2] == 0 && z_mask >= 0) {
434
+ ctx->a_mask = z_mask_old ^ z_mask;
435
+ }
436
+ ctx->z_mask = z_mask;
437
+
438
+ return fold_masks(ctx, op);
439
}
440
441
static bool fold_shift(OptContext *ctx, TCGOp *op)
442
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
443
fold_xi_to_x(ctx, op, 0)) {
444
return true;
445
}
446
+
447
+ if (arg_is_const(op->args[2])) {
448
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type,
449
+ arg_info(op->args[1])->z_mask,
450
+ arg_info(op->args[2])->val);
451
+ return fold_masks(ctx, op);
452
+ }
453
return false;
454
}
455
456
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
457
return fold_addsub2_i32(ctx, op, false);
458
}
459
460
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
461
+{
462
+ /* We can't do any folding with a load, but we can record bits. */
463
+ switch (op->opc) {
464
+ CASE_OP_32_64(ld8u):
465
+ ctx->z_mask = MAKE_64BIT_MASK(0, 8);
466
+ break;
467
+ CASE_OP_32_64(ld16u):
468
+ ctx->z_mask = MAKE_64BIT_MASK(0, 16);
469
+ break;
470
+ case INDEX_op_ld32u_i64:
471
+ ctx->z_mask = MAKE_64BIT_MASK(0, 32);
472
+ break;
473
+ default:
474
+ g_assert_not_reached();
475
+ }
476
+ return false;
477
+}
478
+
479
static bool fold_xor(OptContext *ctx, TCGOp *op)
480
{
481
if (fold_const2(ctx, op) ||
482
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
483
fold_xi_to_not(ctx, op, -1)) {
484
return true;
485
}
486
- return false;
487
+
488
+ ctx->z_mask = arg_info(op->args[1])->z_mask
489
+ | arg_info(op->args[2])->z_mask;
490
+ return fold_masks(ctx, op);
491
}
492
493
/* Propagate constants and copies, fold constant expressions. */
494
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
495
}
496
497
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
498
- uint64_t z_mask, partmask, affected, tmp;
499
TCGOpcode opc = op->opc;
500
const TCGOpDef *def;
501
bool done = false;
502
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
503
break;
504
}
505
506
- /* Simplify using known-zero bits. Currently only ops with a single
507
- output argument is supported. */
508
- z_mask = -1;
509
- affected = -1;
510
- switch (opc) {
511
- CASE_OP_32_64(ext8s):
512
- if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
513
- break;
514
- }
515
- QEMU_FALLTHROUGH;
516
- CASE_OP_32_64(ext8u):
517
- z_mask = 0xff;
518
- goto and_const;
519
- CASE_OP_32_64(ext16s):
520
- if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
521
- break;
522
- }
523
- QEMU_FALLTHROUGH;
524
- CASE_OP_32_64(ext16u):
525
- z_mask = 0xffff;
526
- goto and_const;
527
- case INDEX_op_ext32s_i64:
528
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
529
- break;
530
- }
531
- QEMU_FALLTHROUGH;
532
- case INDEX_op_ext32u_i64:
533
- z_mask = 0xffffffffU;
534
- goto and_const;
535
-
536
- CASE_OP_32_64(and):
537
- z_mask = arg_info(op->args[2])->z_mask;
538
- if (arg_is_const(op->args[2])) {
539
- and_const:
540
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
541
- }
542
- z_mask = arg_info(op->args[1])->z_mask & z_mask;
543
- break;
544
-
545
- case INDEX_op_ext_i32_i64:
546
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
547
- break;
548
- }
549
- QEMU_FALLTHROUGH;
550
- case INDEX_op_extu_i32_i64:
551
- /* We do not compute affected as it is a size changing op. */
552
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
553
- break;
554
-
555
- CASE_OP_32_64(andc):
556
- /* Known-zeros does not imply known-ones. Therefore unless
557
- op->args[2] is constant, we can't infer anything from it. */
558
- if (arg_is_const(op->args[2])) {
559
- z_mask = ~arg_info(op->args[2])->z_mask;
560
- goto and_const;
561
- }
562
- /* But we certainly know nothing outside args[1] may be set. */
563
- z_mask = arg_info(op->args[1])->z_mask;
564
- break;
565
-
566
- case INDEX_op_sar_i32:
567
- if (arg_is_const(op->args[2])) {
568
- tmp = arg_info(op->args[2])->val & 31;
569
- z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
570
- }
571
- break;
572
- case INDEX_op_sar_i64:
573
- if (arg_is_const(op->args[2])) {
574
- tmp = arg_info(op->args[2])->val & 63;
575
- z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
576
- }
577
- break;
578
-
579
- case INDEX_op_shr_i32:
580
- if (arg_is_const(op->args[2])) {
581
- tmp = arg_info(op->args[2])->val & 31;
582
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
583
- }
584
- break;
585
- case INDEX_op_shr_i64:
586
- if (arg_is_const(op->args[2])) {
587
- tmp = arg_info(op->args[2])->val & 63;
588
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
589
- }
590
- break;
591
-
592
- case INDEX_op_extrl_i64_i32:
593
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
594
- break;
595
- case INDEX_op_extrh_i64_i32:
596
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
597
- break;
598
-
599
- CASE_OP_32_64(shl):
600
- if (arg_is_const(op->args[2])) {
601
- tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
602
- z_mask = arg_info(op->args[1])->z_mask << tmp;
603
- }
604
- break;
605
-
606
- CASE_OP_32_64(neg):
607
- /* Set to 1 all bits to the left of the rightmost. */
608
- z_mask = -(arg_info(op->args[1])->z_mask
609
- & -arg_info(op->args[1])->z_mask);
610
- break;
611
-
612
- CASE_OP_32_64(deposit):
613
- z_mask = deposit64(arg_info(op->args[1])->z_mask,
614
- op->args[3], op->args[4],
615
- arg_info(op->args[2])->z_mask);
616
- break;
617
-
618
- CASE_OP_32_64(extract):
619
- z_mask = extract64(arg_info(op->args[1])->z_mask,
620
- op->args[2], op->args[3]);
621
- if (op->args[2] == 0) {
622
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
623
- }
624
- break;
625
- CASE_OP_32_64(sextract):
626
- z_mask = sextract64(arg_info(op->args[1])->z_mask,
627
- op->args[2], op->args[3]);
628
- if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
629
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
630
- }
631
- break;
632
-
633
- CASE_OP_32_64(or):
634
- CASE_OP_32_64(xor):
635
- z_mask = arg_info(op->args[1])->z_mask
636
- | arg_info(op->args[2])->z_mask;
637
- break;
638
-
639
- case INDEX_op_clz_i32:
640
- case INDEX_op_ctz_i32:
641
- z_mask = arg_info(op->args[2])->z_mask | 31;
642
- break;
643
-
644
- case INDEX_op_clz_i64:
645
- case INDEX_op_ctz_i64:
646
- z_mask = arg_info(op->args[2])->z_mask | 63;
647
- break;
648
-
649
- case INDEX_op_ctpop_i32:
650
- z_mask = 32 | 31;
651
- break;
652
- case INDEX_op_ctpop_i64:
653
- z_mask = 64 | 63;
654
- break;
655
-
656
- CASE_OP_32_64(setcond):
657
- case INDEX_op_setcond2_i32:
658
- z_mask = 1;
659
- break;
660
-
661
- CASE_OP_32_64(movcond):
662
- z_mask = arg_info(op->args[3])->z_mask
663
- | arg_info(op->args[4])->z_mask;
664
- break;
665
-
666
- CASE_OP_32_64(ld8u):
667
- z_mask = 0xff;
668
- break;
669
- CASE_OP_32_64(ld16u):
670
- z_mask = 0xffff;
671
- break;
672
- case INDEX_op_ld32u_i64:
673
- z_mask = 0xffffffffu;
674
- break;
675
-
676
- CASE_OP_32_64(qemu_ld):
677
- {
678
- MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
679
- MemOp mop = get_memop(oi);
680
- if (!(mop & MO_SIGN)) {
681
- z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
682
- }
683
- }
684
- break;
685
-
686
- CASE_OP_32_64(bswap16):
687
- z_mask = arg_info(op->args[1])->z_mask;
688
- if (z_mask <= 0xffff) {
689
- op->args[2] |= TCG_BSWAP_IZ;
690
- }
691
- z_mask = bswap16(z_mask);
692
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
693
- case TCG_BSWAP_OZ:
694
- break;
695
- case TCG_BSWAP_OS:
696
- z_mask = (int16_t)z_mask;
697
- break;
698
- default: /* undefined high bits */
699
- z_mask |= MAKE_64BIT_MASK(16, 48);
700
- break;
701
- }
702
- break;
703
-
704
- case INDEX_op_bswap32_i64:
705
- z_mask = arg_info(op->args[1])->z_mask;
706
- if (z_mask <= 0xffffffffu) {
707
- op->args[2] |= TCG_BSWAP_IZ;
708
- }
709
- z_mask = bswap32(z_mask);
710
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
711
- case TCG_BSWAP_OZ:
712
- break;
713
- case TCG_BSWAP_OS:
714
- z_mask = (int32_t)z_mask;
715
- break;
716
- default: /* undefined high bits */
717
- z_mask |= MAKE_64BIT_MASK(32, 32);
718
- break;
719
- }
720
- break;
721
-
722
- default:
723
- break;
724
- }
725
-
726
- /* 32-bit ops generate 32-bit results. For the result is zero test
727
- below, we can ignore high bits, but for further optimizations we
728
- need to record that the high bits contain garbage. */
729
- partmask = z_mask;
730
- if (ctx.type == TCG_TYPE_I32) {
731
- z_mask |= ~(tcg_target_ulong)0xffffffffu;
732
- partmask &= 0xffffffffu;
733
- affected &= 0xffffffffu;
734
- }
735
- ctx.z_mask = z_mask;
736
-
737
- if (partmask == 0) {
738
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
739
- continue;
740
- }
741
- if (affected == 0) {
742
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
743
- continue;
744
- }
745
+ /* Assume all bits affected, and no bits known zero. */
746
+ ctx.a_mask = -1;
747
+ ctx.z_mask = -1;
748
749
/*
750
* Process each opcode.
751
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
752
case INDEX_op_extrh_i64_i32:
753
done = fold_extu(&ctx, op);
754
break;
755
+ CASE_OP_32_64(ld8u):
756
+ CASE_OP_32_64(ld16u):
757
+ case INDEX_op_ld32u_i64:
758
+ done = fold_tcg_ld(&ctx, op);
759
+ break;
760
case INDEX_op_mb:
761
done = fold_mb(&ctx, op);
762
break;
763
--
71
--
764
2.25.1
72
2.34.1
765
766
diff view generated by jsdifflib
Deleted patch
1
Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
2
and muls2_i64.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
9
1 file changed, 35 insertions(+), 9 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
16
return false;
17
}
18
19
-static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
20
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
21
{
22
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
23
- uint32_t a = arg_info(op->args[2])->val;
24
- uint32_t b = arg_info(op->args[3])->val;
25
- uint64_t r = (uint64_t)a * b;
26
+ uint64_t a = arg_info(op->args[2])->val;
27
+ uint64_t b = arg_info(op->args[3])->val;
28
+ uint64_t h, l;
29
TCGArg rl, rh;
30
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
31
+ TCGOp *op2;
32
+
33
+ switch (op->opc) {
34
+ case INDEX_op_mulu2_i32:
35
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
36
+ h = (int32_t)(l >> 32);
37
+ l = (int32_t)l;
38
+ break;
39
+ case INDEX_op_muls2_i32:
40
+ l = (int64_t)(int32_t)a * (int32_t)b;
41
+ h = l >> 32;
42
+ l = (int32_t)l;
43
+ break;
44
+ case INDEX_op_mulu2_i64:
45
+ mulu64(&l, &h, a, b);
46
+ break;
47
+ case INDEX_op_muls2_i64:
48
+ muls64(&l, &h, a, b);
49
+ break;
50
+ default:
51
+ g_assert_not_reached();
52
+ }
53
54
rl = op->args[0];
55
rh = op->args[1];
56
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
57
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
58
+
59
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
60
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
61
+
62
+ tcg_opt_gen_movi(ctx, op, rl, l);
63
+ tcg_opt_gen_movi(ctx, op2, rh, h);
64
return true;
65
}
66
return false;
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
CASE_OP_32_64(muluh):
69
done = fold_mul_highpart(&ctx, op);
70
break;
71
- case INDEX_op_mulu2_i32:
72
- done = fold_mulu2_i32(&ctx, op);
73
+ CASE_OP_32_64(muls2):
74
+ CASE_OP_32_64(mulu2):
75
+ done = fold_multiply2(&ctx, op);
76
break;
77
CASE_OP_32_64(nand):
78
done = fold_nand(&ctx, op);
79
--
80
2.25.1
81
82
diff view generated by jsdifflib
Deleted patch
1
Rename to fold_addsub2.
2
Use Int128 to implement the wider operation.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
10
1 file changed, 44 insertions(+), 21 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
17
*/
18
19
#include "qemu/osdep.h"
20
+#include "qemu/int128.h"
21
#include "tcg/tcg-op.h"
22
#include "tcg-internal.h"
23
24
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
25
return false;
26
}
27
28
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
29
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
30
{
31
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
32
arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
33
- uint32_t al = arg_info(op->args[2])->val;
34
- uint32_t ah = arg_info(op->args[3])->val;
35
- uint32_t bl = arg_info(op->args[4])->val;
36
- uint32_t bh = arg_info(op->args[5])->val;
37
- uint64_t a = ((uint64_t)ah << 32) | al;
38
- uint64_t b = ((uint64_t)bh << 32) | bl;
39
+ uint64_t al = arg_info(op->args[2])->val;
40
+ uint64_t ah = arg_info(op->args[3])->val;
41
+ uint64_t bl = arg_info(op->args[4])->val;
42
+ uint64_t bh = arg_info(op->args[5])->val;
43
TCGArg rl, rh;
44
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
45
+ TCGOp *op2;
46
47
- if (add) {
48
- a += b;
49
+ if (ctx->type == TCG_TYPE_I32) {
50
+ uint64_t a = deposit64(al, 32, 32, ah);
51
+ uint64_t b = deposit64(bl, 32, 32, bh);
52
+
53
+ if (add) {
54
+ a += b;
55
+ } else {
56
+ a -= b;
57
+ }
58
+
59
+ al = sextract64(a, 0, 32);
60
+ ah = sextract64(a, 32, 32);
61
} else {
62
- a -= b;
63
+ Int128 a = int128_make128(al, ah);
64
+ Int128 b = int128_make128(bl, bh);
65
+
66
+ if (add) {
67
+ a = int128_add(a, b);
68
+ } else {
69
+ a = int128_sub(a, b);
70
+ }
71
+
72
+ al = int128_getlo(a);
73
+ ah = int128_gethi(a);
74
}
75
76
rl = op->args[0];
77
rh = op->args[1];
78
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
79
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
80
+
81
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
82
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
83
+
84
+ tcg_opt_gen_movi(ctx, op, rl, al);
85
+ tcg_opt_gen_movi(ctx, op2, rh, ah);
86
return true;
87
}
88
return false;
89
}
90
91
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
92
+static bool fold_add2(OptContext *ctx, TCGOp *op)
93
{
94
- return fold_addsub2_i32(ctx, op, true);
95
+ return fold_addsub2(ctx, op, true);
96
}
97
98
static bool fold_and(OptContext *ctx, TCGOp *op)
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
100
return false;
101
}
102
103
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
104
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
105
{
106
- return fold_addsub2_i32(ctx, op, false);
107
+ return fold_addsub2(ctx, op, false);
108
}
109
110
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
CASE_OP_32_64_VEC(add):
113
done = fold_add(&ctx, op);
114
break;
115
- case INDEX_op_add2_i32:
116
- done = fold_add2_i32(&ctx, op);
117
+ CASE_OP_32_64(add2):
118
+ done = fold_add2(&ctx, op);
119
break;
120
CASE_OP_32_64_VEC(and):
121
done = fold_and(&ctx, op);
122
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
123
CASE_OP_32_64_VEC(sub):
124
done = fold_sub(&ctx, op);
125
break;
126
- case INDEX_op_sub2_i32:
127
- done = fold_sub2_i32(&ctx, op);
128
+ CASE_OP_32_64(sub2):
129
+ done = fold_sub2(&ctx, op);
130
break;
131
CASE_OP_32_64_VEC(xor):
132
done = fold_xor(&ctx, op);
133
--
134
2.25.1
135
136
diff view generated by jsdifflib
Deleted patch
1
This "garbage" setting pre-dates the addition of the type
2
changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
3
and INDEX_op_extr{l,h}_i64_i32.
4
1
5
So now we have a definitive points at which to adjust z_mask
6
to eliminate such bits from the 32-bit operands.
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
tcg/optimize.c | 35 ++++++++++++++++-------------------
13
1 file changed, 16 insertions(+), 19 deletions(-)
14
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
18
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
20
ti->is_const = true;
21
ti->val = ts->val;
22
ti->z_mask = ts->val;
23
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
24
- /* High bits of a 32-bit quantity are garbage. */
25
- ti->z_mask |= ~0xffffffffull;
26
- }
27
} else {
28
ti->is_const = false;
29
ti->z_mask = -1;
30
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
31
TCGTemp *src_ts = arg_temp(src);
32
TempOptInfo *di;
33
TempOptInfo *si;
34
- uint64_t z_mask;
35
TCGOpcode new_op;
36
37
if (ts_are_copies(dst_ts, src_ts)) {
38
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
39
op->args[0] = dst;
40
op->args[1] = src;
41
42
- z_mask = si->z_mask;
43
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
44
- /* High bits of the destination are now garbage. */
45
- z_mask |= ~0xffffffffull;
46
- }
47
- di->z_mask = z_mask;
48
+ di->z_mask = si->z_mask;
49
50
if (src_ts->type == dst_ts->type) {
51
TempOptInfo *ni = ts_info(si->next_copy);
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
53
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
54
TCGArg dst, uint64_t val)
55
{
56
- /* Convert movi to mov with constant temp. */
57
- TCGTemp *tv = tcg_constant_internal(ctx->type, val);
58
+ TCGTemp *tv;
59
60
+ if (ctx->type == TCG_TYPE_I32) {
61
+ val = (int32_t)val;
62
+ }
63
+
64
+ /* Convert movi to mov with constant temp. */
65
+ tv = tcg_constant_internal(ctx->type, val);
66
init_ts_info(ctx, tv);
67
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
68
}
69
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
70
uint64_t z_mask = ctx->z_mask;
71
72
/*
73
- * 32-bit ops generate 32-bit results. For the result is zero test
74
- * below, we can ignore high bits, but for further optimizations we
75
- * need to record that the high bits contain garbage.
76
+ * 32-bit ops generate 32-bit results, which for the purpose of
77
+ * simplifying tcg are sign-extended. Certainly that's how we
78
+ * represent our constants elsewhere. Note that the bits will
79
+ * be reset properly for a 64-bit value when encountering the
80
+ * type changing opcodes.
81
*/
82
if (ctx->type == TCG_TYPE_I32) {
83
- ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
84
- a_mask &= MAKE_64BIT_MASK(0, 32);
85
- z_mask &= MAKE_64BIT_MASK(0, 32);
86
+ a_mask = (int32_t)a_mask;
87
+ z_mask = (int32_t)z_mask;
88
+ ctx->z_mask = z_mask;
89
}
90
91
if (z_mask == 0) {
92
--
93
2.25.1
94
95
diff view generated by jsdifflib
Deleted patch
1
Recognize the constant function for or-complement.
2
1
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 1 +
9
1 file changed, 1 insertion(+)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
16
static bool fold_orc(OptContext *ctx, TCGOp *op)
17
{
18
if (fold_const2(ctx, op) ||
19
+ fold_xx_to_i(ctx, op, -1) ||
20
fold_xi_to_x(ctx, op, -1) ||
21
fold_ix_to_not(ctx, op, 0)) {
22
return true;
23
--
24
2.25.1
25
26
diff view generated by jsdifflib
Deleted patch
1
Recognize the identity function for low-part multiply.
2
1
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 3 ++-
9
1 file changed, 2 insertions(+), 1 deletion(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
16
static bool fold_mul(OptContext *ctx, TCGOp *op)
17
{
18
if (fold_const2(ctx, op) ||
19
- fold_xi_to_i(ctx, op, 0)) {
20
+ fold_xi_to_i(ctx, op, 0) ||
21
+ fold_xi_to_x(ctx, op, 1)) {
22
return true;
23
}
24
return false;
25
--
26
2.25.1
27
28
diff view generated by jsdifflib
Deleted patch
1
Recognize the identity function for division.
2
1
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 6 +++++-
9
1 file changed, 5 insertions(+), 1 deletion(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
16
17
static bool fold_divide(OptContext *ctx, TCGOp *op)
18
{
19
- return fold_const2(ctx, op);
20
+ if (fold_const2(ctx, op) ||
21
+ fold_xi_to_x(ctx, op, 1)) {
22
+ return true;
23
+ }
24
+ return false;
25
}
26
27
static bool fold_dup(OptContext *ctx, TCGOp *op)
28
--
29
2.25.1
30
31
diff view generated by jsdifflib
Deleted patch
1
Recognize the constant function for remainder.
2
1
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
tcg/optimize.c | 6 +++++-
8
1 file changed, 5 insertions(+), 1 deletion(-)
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
15
16
static bool fold_remainder(OptContext *ctx, TCGOp *op)
17
{
18
- return fold_const2(ctx, op);
19
+ if (fold_const2(ctx, op) ||
20
+ fold_xx_to_i(ctx, op, 0)) {
21
+ return true;
22
+ }
23
+ return false;
24
}
25
26
static bool fold_setcond(OptContext *ctx, TCGOp *op)
27
--
28
2.25.1
29
30
diff view generated by jsdifflib
Deleted patch
1
Sign repetitions are perforce all identical, whether they are 1 or 0.
2
Bitwise operations preserve the relative quantity of the repetitions.
3
1
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 29 +++++++++++++++++++++++++++++
10
1 file changed, 29 insertions(+)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
17
z2 = arg_info(op->args[2])->z_mask;
18
ctx->z_mask = z1 & z2;
19
20
+ /*
21
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
22
+ * Bitwise operations preserve the relative quantity of the repetitions.
23
+ */
24
+ ctx->s_mask = arg_info(op->args[1])->s_mask
25
+ & arg_info(op->args[2])->s_mask;
26
+
27
/*
28
* Known-zeros does not imply known-ones. Therefore unless
29
* arg2 is constant, we can't infer affected bits from it.
30
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
31
}
32
ctx->z_mask = z1;
33
34
+ ctx->s_mask = arg_info(op->args[1])->s_mask
35
+ & arg_info(op->args[2])->s_mask;
36
return fold_masks(ctx, op);
37
}
38
39
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
40
fold_xi_to_not(ctx, op, 0)) {
41
return true;
42
}
43
+
44
+ ctx->s_mask = arg_info(op->args[1])->s_mask
45
+ & arg_info(op->args[2])->s_mask;
46
return false;
47
}
48
49
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
50
51
ctx->z_mask = arg_info(op->args[3])->z_mask
52
| arg_info(op->args[4])->z_mask;
53
+ ctx->s_mask = arg_info(op->args[3])->s_mask
54
+ & arg_info(op->args[4])->s_mask;
55
56
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
57
uint64_t tv = arg_info(op->args[3])->val;
58
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
59
fold_xi_to_not(ctx, op, -1)) {
60
return true;
61
}
62
+
63
+ ctx->s_mask = arg_info(op->args[1])->s_mask
64
+ & arg_info(op->args[2])->s_mask;
65
return false;
66
}
67
68
@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
69
fold_xi_to_not(ctx, op, 0)) {
70
return true;
71
}
72
+
73
+ ctx->s_mask = arg_info(op->args[1])->s_mask
74
+ & arg_info(op->args[2])->s_mask;
75
return false;
76
}
77
78
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
79
return true;
80
}
81
82
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
83
+
84
/* Because of fold_to_not, we want to always return true, via finish. */
85
finish_folding(ctx, op);
86
return true;
87
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
88
89
ctx->z_mask = arg_info(op->args[1])->z_mask
90
| arg_info(op->args[2])->z_mask;
91
+ ctx->s_mask = arg_info(op->args[1])->s_mask
92
+ & arg_info(op->args[2])->s_mask;
93
return fold_masks(ctx, op);
94
}
95
96
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
97
fold_ix_to_not(ctx, op, 0)) {
98
return true;
99
}
100
+
101
+ ctx->s_mask = arg_info(op->args[1])->s_mask
102
+ & arg_info(op->args[2])->s_mask;
103
return false;
104
}
105
106
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
107
108
ctx->z_mask = arg_info(op->args[1])->z_mask
109
| arg_info(op->args[2])->z_mask;
110
+ ctx->s_mask = arg_info(op->args[1])->s_mask
111
+ & arg_info(op->args[2])->s_mask;
112
return fold_masks(ctx, op);
113
}
114
115
--
116
2.25.1
117
118
diff view generated by jsdifflib