1
The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:
1
The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:
2
2
3
Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)
3
Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211027
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211028
8
8
9
for you to fetch changes up to 820c025f0dcacf2f3c12735b1f162893fbfa7bc6:
9
for you to fetch changes up to efd629fb21e2ff6a8f62642d9ed7a23dfee4d320:
10
10
11
tcg/optimize: Propagate sign info for shifting (2021-10-27 17:11:23 -0700)
11
softmmu: fix for "after access" watchpoints (2021-10-28 20:55:07 -0700)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Improvements to qemu/int128
14
Improvements to qemu/int128
15
Fixes for 128/64 division.
15
Fixes for 128/64 division.
16
Cleanup tcg/optimize.c
16
Cleanup tcg/optimize.c
...
...
24
host-utils: move checks out of divu128/divs128
24
host-utils: move checks out of divu128/divs128
25
host-utils: move udiv_qrnnd() to host-utils
25
host-utils: move udiv_qrnnd() to host-utils
26
host-utils: add 128-bit quotient support to divu128/divs128
26
host-utils: add 128-bit quotient support to divu128/divs128
27
host-utils: add unit tests for divu128/divs128
27
host-utils: add unit tests for divu128/divs128
28
28
29
Richard Henderson (51):
29
Pavel Dovgalyuk (3):
30
softmmu: fix watchpoint processing in icount mode
31
softmmu: remove useless condition in watchpoint check
32
softmmu: fix for "after access" watchpoints
33
34
Richard Henderson (52):
30
tcg/optimize: Rename "mask" to "z_mask"
35
tcg/optimize: Rename "mask" to "z_mask"
31
tcg/optimize: Split out OptContext
36
tcg/optimize: Split out OptContext
32
tcg/optimize: Remove do_default label
37
tcg/optimize: Remove do_default label
33
tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
38
tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
34
tcg/optimize: Move prev_mb into OptContext
39
tcg/optimize: Move prev_mb into OptContext
...
...
66
tcg/optimize: Split out fold_ix_to_i
71
tcg/optimize: Split out fold_ix_to_i
67
tcg/optimize: Split out fold_masks
72
tcg/optimize: Split out fold_masks
68
tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
73
tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
69
tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
74
tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
70
tcg/optimize: Sink commutative operand swapping into fold functions
75
tcg/optimize: Sink commutative operand swapping into fold functions
76
tcg: Extend call args using the correct opcodes
71
tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
77
tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
72
tcg/optimize: Use fold_xx_to_i for orc
78
tcg/optimize: Use fold_xx_to_i for orc
73
tcg/optimize: Use fold_xi_to_x for mul
79
tcg/optimize: Use fold_xi_to_x for mul
74
tcg/optimize: Use fold_xi_to_x for div
80
tcg/optimize: Use fold_xi_to_x for div
75
tcg/optimize: Use fold_xx_to_i for rem
81
tcg/optimize: Use fold_xx_to_i for rem
...
...
81
87
82
include/fpu/softfloat-macros.h | 82 --
88
include/fpu/softfloat-macros.h | 82 --
83
include/hw/clock.h | 5 +-
89
include/hw/clock.h | 5 +-
84
include/qemu/host-utils.h | 121 +-
90
include/qemu/host-utils.h | 121 +-
85
include/qemu/int128.h | 20 +
91
include/qemu/int128.h | 20 +
92
softmmu/physmem.c | 41 +-
86
target/ppc/int_helper.c | 23 +-
93
target/ppc/int_helper.c | 23 +-
87
tcg/optimize.c | 2644 ++++++++++++++++++++++++----------------
94
tcg/optimize.c | 2644 ++++++++++++++++++++++++----------------
95
tcg/tcg.c | 6 +-
88
tests/unit/test-div128.c | 197 +++
96
tests/unit/test-div128.c | 197 +++
89
util/host-utils.c | 147 ++-
97
util/host-utils.c | 147 ++-
90
tests/unit/meson.build | 1 +
98
tests/unit/meson.build | 1 +
91
9 files changed, 2053 insertions(+), 1187 deletions(-)
99
11 files changed, 2075 insertions(+), 1212 deletions(-)
92
create mode 100644 tests/unit/test-div128.c
100
create mode 100644 tests/unit/test-div128.c
93
101
diff view generated by jsdifflib
1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
2
2
3
Addition of not and xor on 128-bit integers.
3
Addition of not and xor on 128-bit integers.
4
4
5
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
5
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
6
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
6
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
7
Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
7
Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
8
[rth: Split out logical operations.]
8
[rth: Split out logical operations.]
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
11
---
12
include/qemu/int128.h | 20 ++++++++++++++++++++
12
include/qemu/int128.h | 20 ++++++++++++++++++++
13
1 file changed, 20 insertions(+)
13
1 file changed, 20 insertions(+)
14
14
15
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
15
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/qemu/int128.h
17
--- a/include/qemu/int128.h
18
+++ b/include/qemu/int128.h
18
+++ b/include/qemu/int128.h
19
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
19
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
20
return a;
20
return a;
21
}
21
}
22
22
23
+static inline Int128 int128_not(Int128 a)
23
+static inline Int128 int128_not(Int128 a)
24
+{
24
+{
25
+ return ~a;
25
+ return ~a;
26
+}
26
+}
27
+
27
+
28
static inline Int128 int128_and(Int128 a, Int128 b)
28
static inline Int128 int128_and(Int128 a, Int128 b)
29
{
29
{
30
return a & b;
30
return a & b;
31
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
31
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
32
return a | b;
32
return a | b;
33
}
33
}
34
34
35
+static inline Int128 int128_xor(Int128 a, Int128 b)
35
+static inline Int128 int128_xor(Int128 a, Int128 b)
36
+{
36
+{
37
+ return a ^ b;
37
+ return a ^ b;
38
+}
38
+}
39
+
39
+
40
static inline Int128 int128_rshift(Int128 a, int n)
40
static inline Int128 int128_rshift(Int128 a, int n)
41
{
41
{
42
return a >> n;
42
return a >> n;
43
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
43
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
44
return int128_make128(a, (a < 0) ? -1 : 0);
44
return int128_make128(a, (a < 0) ? -1 : 0);
45
}
45
}
46
46
47
+static inline Int128 int128_not(Int128 a)
47
+static inline Int128 int128_not(Int128 a)
48
+{
48
+{
49
+ return int128_make128(~a.lo, ~a.hi);
49
+ return int128_make128(~a.lo, ~a.hi);
50
+}
50
+}
51
+
51
+
52
static inline Int128 int128_and(Int128 a, Int128 b)
52
static inline Int128 int128_and(Int128 a, Int128 b)
53
{
53
{
54
return int128_make128(a.lo & b.lo, a.hi & b.hi);
54
return int128_make128(a.lo & b.lo, a.hi & b.hi);
55
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
55
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
56
return int128_make128(a.lo | b.lo, a.hi | b.hi);
56
return int128_make128(a.lo | b.lo, a.hi | b.hi);
57
}
57
}
58
58
59
+static inline Int128 int128_xor(Int128 a, Int128 b)
59
+static inline Int128 int128_xor(Int128 a, Int128 b)
60
+{
60
+{
61
+ return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
61
+ return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
62
+}
62
+}
63
+
63
+
64
static inline Int128 int128_rshift(Int128 a, int n)
64
static inline Int128 int128_rshift(Int128 a, int n)
65
{
65
{
66
int64_t h;
66
int64_t h;
67
--
67
--
68
2.25.1
68
2.25.1
69
69
70
70
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
2
3
In preparation for changing the divu128/divs128 implementations
3
In preparation for changing the divu128/divs128 implementations
4
to allow for quotients larger than 64 bits, move the div-by-zero
4
to allow for quotients larger than 64 bits, move the div-by-zero
5
and overflow checks to the callers.
5
and overflow checks to the callers.
6
6
7
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
9
Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
11
---
12
include/hw/clock.h | 5 +++--
12
include/hw/clock.h | 5 +++--
13
include/qemu/host-utils.h | 34 ++++++++++++---------------------
13
include/qemu/host-utils.h | 34 ++++++++++++---------------------
14
target/ppc/int_helper.c | 14 +++++++++-----
14
target/ppc/int_helper.c | 14 +++++++++-----
15
util/host-utils.c | 40 ++++++++++++++++++---------------------
15
util/host-utils.c | 40 ++++++++++++++++++---------------------
16
4 files changed, 42 insertions(+), 51 deletions(-)
16
4 files changed, 42 insertions(+), 51 deletions(-)
17
17
18
diff --git a/include/hw/clock.h b/include/hw/clock.h
18
diff --git a/include/hw/clock.h b/include/hw/clock.h
19
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/clock.h
20
--- a/include/hw/clock.h
21
+++ b/include/hw/clock.h
21
+++ b/include/hw/clock.h
22
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
22
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
23
return 0;
23
return 0;
24
}
24
}
25
/*
25
/*
26
- * Ignore divu128() return value as we've caught div-by-zero and don't
26
- * Ignore divu128() return value as we've caught div-by-zero and don't
27
- * need different behaviour for overflow.
27
- * need different behaviour for overflow.
28
+ * BUG: when CONFIG_INT128 is not defined, the current implementation of
28
+ * BUG: when CONFIG_INT128 is not defined, the current implementation of
29
+ * divu128 does not return a valid truncated quotient, so the result will
29
+ * divu128 does not return a valid truncated quotient, so the result will
30
+ * be wrong.
30
+ * be wrong.
31
*/
31
*/
32
divu128(&lo, &hi, clk->period);
32
divu128(&lo, &hi, clk->period);
33
return lo;
33
return lo;
34
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
34
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
35
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/qemu/host-utils.h
36
--- a/include/qemu/host-utils.h
37
+++ b/include/qemu/host-utils.h
37
+++ b/include/qemu/host-utils.h
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
39
return (__int128_t)a * b / c;
39
return (__int128_t)a * b / c;
40
}
40
}
41
41
42
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
42
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
43
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
43
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
44
{
44
{
45
- if (divisor == 0) {
45
- if (divisor == 0) {
46
- return 1;
46
- return 1;
47
- } else {
47
- } else {
48
- __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
48
- __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
49
- __uint128_t result = dividend / divisor;
49
- __uint128_t result = dividend / divisor;
50
- *plow = result;
50
- *plow = result;
51
- *phigh = dividend % divisor;
51
- *phigh = dividend % divisor;
52
- return result > UINT64_MAX;
52
- return result > UINT64_MAX;
53
- }
53
- }
54
+ __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
54
+ __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
55
+ __uint128_t result = dividend / divisor;
55
+ __uint128_t result = dividend / divisor;
56
+ *plow = result;
56
+ *plow = result;
57
+ *phigh = dividend % divisor;
57
+ *phigh = dividend % divisor;
58
}
58
}
59
59
60
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
60
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
61
+static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
61
+static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
62
{
62
{
63
- if (divisor == 0) {
63
- if (divisor == 0) {
64
- return 1;
64
- return 1;
65
- } else {
65
- } else {
66
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
66
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
67
- __int128_t result = dividend / divisor;
67
- __int128_t result = dividend / divisor;
68
- *plow = result;
68
- *plow = result;
69
- *phigh = dividend % divisor;
69
- *phigh = dividend % divisor;
70
- return result != *plow;
70
- return result != *plow;
71
- }
71
- }
72
+ __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
72
+ __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
73
+ __int128_t result = dividend / divisor;
73
+ __int128_t result = dividend / divisor;
74
+ *plow = result;
74
+ *plow = result;
75
+ *phigh = dividend % divisor;
75
+ *phigh = dividend % divisor;
76
}
76
}
77
#else
77
#else
78
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
78
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
79
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
79
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
80
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
80
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
81
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
81
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
82
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
82
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
83
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
83
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
84
84
85
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
85
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
86
{
86
{
87
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
87
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
88
index XXXXXXX..XXXXXXX 100644
88
index XXXXXXX..XXXXXXX 100644
89
--- a/target/ppc/int_helper.c
89
--- a/target/ppc/int_helper.c
90
+++ b/target/ppc/int_helper.c
90
+++ b/target/ppc/int_helper.c
91
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
91
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
92
uint64_t rt = 0;
92
uint64_t rt = 0;
93
int overflow = 0;
93
int overflow = 0;
94
94
95
- overflow = divu128(&rt, &ra, rb);
95
- overflow = divu128(&rt, &ra, rb);
96
-
96
-
97
- if (unlikely(overflow)) {
97
- if (unlikely(overflow)) {
98
+ if (unlikely(rb == 0 || ra >= rb)) {
98
+ if (unlikely(rb == 0 || ra >= rb)) {
99
+ overflow = 1;
99
+ overflow = 1;
100
rt = 0; /* Undefined */
100
rt = 0; /* Undefined */
101
+ } else {
101
+ } else {
102
+ divu128(&rt, &ra, rb);
102
+ divu128(&rt, &ra, rb);
103
}
103
}
104
104
105
if (oe) {
105
if (oe) {
106
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
106
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
107
int64_t rt = 0;
107
int64_t rt = 0;
108
int64_t ra = (int64_t)rau;
108
int64_t ra = (int64_t)rau;
109
int64_t rb = (int64_t)rbu;
109
int64_t rb = (int64_t)rbu;
110
- int overflow = divs128(&rt, &ra, rb);
110
- int overflow = divs128(&rt, &ra, rb);
111
+ int overflow = 0;
111
+ int overflow = 0;
112
112
113
- if (unlikely(overflow)) {
113
- if (unlikely(overflow)) {
114
+ if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
114
+ if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
115
+ overflow = 1;
115
+ overflow = 1;
116
rt = 0; /* Undefined */
116
rt = 0; /* Undefined */
117
+ } else {
117
+ } else {
118
+ divs128(&rt, &ra, rb);
118
+ divs128(&rt, &ra, rb);
119
}
119
}
120
120
121
if (oe) {
121
if (oe) {
122
diff --git a/util/host-utils.c b/util/host-utils.c
122
diff --git a/util/host-utils.c b/util/host-utils.c
123
index XXXXXXX..XXXXXXX 100644
123
index XXXXXXX..XXXXXXX 100644
124
--- a/util/host-utils.c
124
--- a/util/host-utils.c
125
+++ b/util/host-utils.c
125
+++ b/util/host-utils.c
126
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
126
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
127
*phigh = rh;
127
*phigh = rh;
128
}
128
}
129
129
130
-/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
130
-/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
131
-/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
131
-/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
132
-/* remainder via phigh. */
132
-/* remainder via phigh. */
133
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
133
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
134
+/*
134
+/*
135
+ * Unsigned 128-by-64 division. Returns quotient via plow and
135
+ * Unsigned 128-by-64 division. Returns quotient via plow and
136
+ * remainder via phigh.
136
+ * remainder via phigh.
137
+ * The result must fit in 64 bits (plow) - otherwise, the result
137
+ * The result must fit in 64 bits (plow) - otherwise, the result
138
+ * is undefined.
138
+ * is undefined.
139
+ * This function will cause a division by zero if passed a zero divisor.
139
+ * This function will cause a division by zero if passed a zero divisor.
140
+ */
140
+ */
141
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
141
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
142
{
142
{
143
uint64_t dhi = *phigh;
143
uint64_t dhi = *phigh;
144
uint64_t dlo = *plow;
144
uint64_t dlo = *plow;
145
unsigned i;
145
unsigned i;
146
uint64_t carry = 0;
146
uint64_t carry = 0;
147
147
148
- if (divisor == 0) {
148
- if (divisor == 0) {
149
- return 1;
149
- return 1;
150
- } else if (dhi == 0) {
150
- } else if (dhi == 0) {
151
+ if (divisor == 0 || dhi == 0) {
151
+ if (divisor == 0 || dhi == 0) {
152
*plow = dlo / divisor;
152
*plow = dlo / divisor;
153
*phigh = dlo % divisor;
153
*phigh = dlo % divisor;
154
- return 0;
154
- return 0;
155
- } else if (dhi >= divisor) {
155
- } else if (dhi >= divisor) {
156
- return 1;
156
- return 1;
157
} else {
157
} else {
158
158
159
for (i = 0; i < 64; i++) {
159
for (i = 0; i < 64; i++) {
160
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
160
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
161
161
162
*plow = dlo;
162
*plow = dlo;
163
*phigh = dhi;
163
*phigh = dhi;
164
- return 0;
164
- return 0;
165
}
165
}
166
}
166
}
167
167
168
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
168
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
169
+/*
169
+/*
170
+ * Signed 128-by-64 division. Returns quotient via plow and
170
+ * Signed 128-by-64 division. Returns quotient via plow and
171
+ * remainder via phigh.
171
+ * remainder via phigh.
172
+ * The result must fit in 64 bits (plow) - otherwise, the result
172
+ * The result must fit in 64 bits (plow) - otherwise, the result
173
+ * is undefined.
173
+ * is undefined.
174
+ * This function will cause a division by zero if passed a zero divisor.
174
+ * This function will cause a division by zero if passed a zero divisor.
175
+ */
175
+ */
176
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
176
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
177
{
177
{
178
int sgn_dvdnd = *phigh < 0;
178
int sgn_dvdnd = *phigh < 0;
179
int sgn_divsr = divisor < 0;
179
int sgn_divsr = divisor < 0;
180
- int overflow = 0;
180
- int overflow = 0;
181
181
182
if (sgn_dvdnd) {
182
if (sgn_dvdnd) {
183
*plow = ~(*plow);
183
*plow = ~(*plow);
184
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
184
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
185
divisor = 0 - divisor;
185
divisor = 0 - divisor;
186
}
186
}
187
187
188
- overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
188
- overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
189
+ divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
189
+ divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
190
190
191
if (sgn_dvdnd ^ sgn_divsr) {
191
if (sgn_dvdnd ^ sgn_divsr) {
192
*plow = 0 - *plow;
192
*plow = 0 - *plow;
193
}
193
}
194
-
194
-
195
- if (!overflow) {
195
- if (!overflow) {
196
- if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
196
- if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
197
- overflow = 1;
197
- overflow = 1;
198
- }
198
- }
199
- }
199
- }
200
-
200
-
201
- return overflow;
201
- return overflow;
202
}
202
}
203
#endif
203
#endif
204
204
205
--
205
--
206
2.25.1
206
2.25.1
207
207
208
208
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
2
3
Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
3
Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
4
so it can be reused by divu128().
4
so it can be reused by divu128().
5
5
6
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
8
Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
10
---
11
include/fpu/softfloat-macros.h | 82 ----------------------------------
11
include/fpu/softfloat-macros.h | 82 ----------------------------------
12
include/qemu/host-utils.h | 81 +++++++++++++++++++++++++++++++++
12
include/qemu/host-utils.h | 81 +++++++++++++++++++++++++++++++++
13
2 files changed, 81 insertions(+), 82 deletions(-)
13
2 files changed, 81 insertions(+), 82 deletions(-)
14
14
15
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
15
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/fpu/softfloat-macros.h
17
--- a/include/fpu/softfloat-macros.h
18
+++ b/include/fpu/softfloat-macros.h
18
+++ b/include/fpu/softfloat-macros.h
19
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@
20
* so some portions are provided under:
20
* so some portions are provided under:
21
* the SoftFloat-2a license
21
* the SoftFloat-2a license
22
* the BSD license
22
* the BSD license
23
- * GPL-v2-or-later
23
- * GPL-v2-or-later
24
*
24
*
25
* Any future contributions to this file after December 1st 2014 will be
25
* Any future contributions to this file after December 1st 2014 will be
26
* taken to be licensed under the Softfloat-2a license unless specifically
26
* taken to be licensed under the Softfloat-2a license unless specifically
27
@@ -XXX,XX +XXX,XX @@ this code that are retained.
27
@@ -XXX,XX +XXX,XX @@ this code that are retained.
28
* THE POSSIBILITY OF SUCH DAMAGE.
28
* THE POSSIBILITY OF SUCH DAMAGE.
29
*/
29
*/
30
30
31
-/* Portions of this work are licensed under the terms of the GNU GPL,
31
-/* Portions of this work are licensed under the terms of the GNU GPL,
32
- * version 2 or later. See the COPYING file in the top-level directory.
32
- * version 2 or later. See the COPYING file in the top-level directory.
33
- */
33
- */
34
-
34
-
35
#ifndef FPU_SOFTFLOAT_MACROS_H
35
#ifndef FPU_SOFTFLOAT_MACROS_H
36
#define FPU_SOFTFLOAT_MACROS_H
36
#define FPU_SOFTFLOAT_MACROS_H
37
37
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
39
39
40
}
40
}
41
41
42
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
42
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
43
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
43
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
44
- *
44
- *
45
- * Licensed under the GPLv2/LGPLv3
45
- * Licensed under the GPLv2/LGPLv3
46
- */
46
- */
47
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
47
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
48
- uint64_t n0, uint64_t d)
48
- uint64_t n0, uint64_t d)
49
-{
49
-{
50
-#if defined(__x86_64__)
50
-#if defined(__x86_64__)
51
- uint64_t q;
51
- uint64_t q;
52
- asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
52
- asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
53
- return q;
53
- return q;
54
-#elif defined(__s390x__) && !defined(__clang__)
54
-#elif defined(__s390x__) && !defined(__clang__)
55
- /* Need to use a TImode type to get an even register pair for DLGR. */
55
- /* Need to use a TImode type to get an even register pair for DLGR. */
56
- unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
56
- unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
57
- asm("dlgr %0, %1" : "+r"(n) : "r"(d));
57
- asm("dlgr %0, %1" : "+r"(n) : "r"(d));
58
- *r = n >> 64;
58
- *r = n >> 64;
59
- return n;
59
- return n;
60
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
60
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
61
- /* From Power ISA 2.06, programming note for divdeu. */
61
- /* From Power ISA 2.06, programming note for divdeu. */
62
- uint64_t q1, q2, Q, r1, r2, R;
62
- uint64_t q1, q2, Q, r1, r2, R;
63
- asm("divdeu %0,%2,%4; divdu %1,%3,%4"
63
- asm("divdeu %0,%2,%4; divdu %1,%3,%4"
64
- : "=&r"(q1), "=r"(q2)
64
- : "=&r"(q1), "=r"(q2)
65
- : "r"(n1), "r"(n0), "r"(d));
65
- : "r"(n1), "r"(n0), "r"(d));
66
- r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
66
- r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
67
- r2 = n0 - (q2 * d);
67
- r2 = n0 - (q2 * d);
68
- Q = q1 + q2;
68
- Q = q1 + q2;
69
- R = r1 + r2;
69
- R = r1 + r2;
70
- if (R >= d || R < r2) { /* overflow implies R > d */
70
- if (R >= d || R < r2) { /* overflow implies R > d */
71
- Q += 1;
71
- Q += 1;
72
- R -= d;
72
- R -= d;
73
- }
73
- }
74
- *r = R;
74
- *r = R;
75
- return Q;
75
- return Q;
76
-#else
76
-#else
77
- uint64_t d0, d1, q0, q1, r1, r0, m;
77
- uint64_t d0, d1, q0, q1, r1, r0, m;
78
-
78
-
79
- d0 = (uint32_t)d;
79
- d0 = (uint32_t)d;
80
- d1 = d >> 32;
80
- d1 = d >> 32;
81
-
81
-
82
- r1 = n1 % d1;
82
- r1 = n1 % d1;
83
- q1 = n1 / d1;
83
- q1 = n1 / d1;
84
- m = q1 * d0;
84
- m = q1 * d0;
85
- r1 = (r1 << 32) | (n0 >> 32);
85
- r1 = (r1 << 32) | (n0 >> 32);
86
- if (r1 < m) {
86
- if (r1 < m) {
87
- q1 -= 1;
87
- q1 -= 1;
88
- r1 += d;
88
- r1 += d;
89
- if (r1 >= d) {
89
- if (r1 >= d) {
90
- if (r1 < m) {
90
- if (r1 < m) {
91
- q1 -= 1;
91
- q1 -= 1;
92
- r1 += d;
92
- r1 += d;
93
- }
93
- }
94
- }
94
- }
95
- }
95
- }
96
- r1 -= m;
96
- r1 -= m;
97
-
97
-
98
- r0 = r1 % d1;
98
- r0 = r1 % d1;
99
- q0 = r1 / d1;
99
- q0 = r1 / d1;
100
- m = q0 * d0;
100
- m = q0 * d0;
101
- r0 = (r0 << 32) | (uint32_t)n0;
101
- r0 = (r0 << 32) | (uint32_t)n0;
102
- if (r0 < m) {
102
- if (r0 < m) {
103
- q0 -= 1;
103
- q0 -= 1;
104
- r0 += d;
104
- r0 += d;
105
- if (r0 >= d) {
105
- if (r0 >= d) {
106
- if (r0 < m) {
106
- if (r0 < m) {
107
- q0 -= 1;
107
- q0 -= 1;
108
- r0 += d;
108
- r0 += d;
109
- }
109
- }
110
- }
110
- }
111
- }
111
- }
112
- r0 -= m;
112
- r0 -= m;
113
-
113
-
114
- *r = r0;
114
- *r = r0;
115
- return (q1 << 32) | q0;
115
- return (q1 << 32) | q0;
116
-#endif
116
-#endif
117
-}
117
-}
118
-
118
-
119
/*----------------------------------------------------------------------------
119
/*----------------------------------------------------------------------------
120
| Returns an approximation to the square root of the 32-bit significand given
120
| Returns an approximation to the square root of the 32-bit significand given
121
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
121
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
122
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
122
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
123
index XXXXXXX..XXXXXXX 100644
123
index XXXXXXX..XXXXXXX 100644
124
--- a/include/qemu/host-utils.h
124
--- a/include/qemu/host-utils.h
125
+++ b/include/qemu/host-utils.h
125
+++ b/include/qemu/host-utils.h
126
@@ -XXX,XX +XXX,XX @@
126
@@ -XXX,XX +XXX,XX @@
127
* THE SOFTWARE.
127
* THE SOFTWARE.
128
*/
128
*/
129
129
130
+/* Portions of this work are licensed under the terms of the GNU GPL,
130
+/* Portions of this work are licensed under the terms of the GNU GPL,
131
+ * version 2 or later. See the COPYING file in the top-level directory.
131
+ * version 2 or later. See the COPYING file in the top-level directory.
132
+ */
132
+ */
133
+
133
+
134
#ifndef HOST_UTILS_H
134
#ifndef HOST_UTILS_H
135
#define HOST_UTILS_H
135
#define HOST_UTILS_H
136
136
137
@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
137
@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
138
*/
138
*/
139
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
139
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
140
140
141
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
141
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
142
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
142
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
143
+ *
143
+ *
144
+ * Licensed under the GPLv2/LGPLv3
144
+ * Licensed under the GPLv2/LGPLv3
145
+ */
145
+ */
146
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
146
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
147
+ uint64_t n0, uint64_t d)
147
+ uint64_t n0, uint64_t d)
148
+{
148
+{
149
+#if defined(__x86_64__)
149
+#if defined(__x86_64__)
150
+ uint64_t q;
150
+ uint64_t q;
151
+ asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
151
+ asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
152
+ return q;
152
+ return q;
153
+#elif defined(__s390x__) && !defined(__clang__)
153
+#elif defined(__s390x__) && !defined(__clang__)
154
+ /* Need to use a TImode type to get an even register pair for DLGR. */
154
+ /* Need to use a TImode type to get an even register pair for DLGR. */
155
+ unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
155
+ unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
156
+ asm("dlgr %0, %1" : "+r"(n) : "r"(d));
156
+ asm("dlgr %0, %1" : "+r"(n) : "r"(d));
157
+ *r = n >> 64;
157
+ *r = n >> 64;
158
+ return n;
158
+ return n;
159
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
159
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
160
+ /* From Power ISA 2.06, programming note for divdeu. */
160
+ /* From Power ISA 2.06, programming note for divdeu. */
161
+ uint64_t q1, q2, Q, r1, r2, R;
161
+ uint64_t q1, q2, Q, r1, r2, R;
162
+ asm("divdeu %0,%2,%4; divdu %1,%3,%4"
162
+ asm("divdeu %0,%2,%4; divdu %1,%3,%4"
163
+ : "=&r"(q1), "=r"(q2)
163
+ : "=&r"(q1), "=r"(q2)
164
+ : "r"(n1), "r"(n0), "r"(d));
164
+ : "r"(n1), "r"(n0), "r"(d));
165
+ r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
165
+ r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
166
+ r2 = n0 - (q2 * d);
166
+ r2 = n0 - (q2 * d);
167
+ Q = q1 + q2;
167
+ Q = q1 + q2;
168
+ R = r1 + r2;
168
+ R = r1 + r2;
169
+ if (R >= d || R < r2) { /* overflow implies R > d */
169
+ if (R >= d || R < r2) { /* overflow implies R > d */
170
+ Q += 1;
170
+ Q += 1;
171
+ R -= d;
171
+ R -= d;
172
+ }
172
+ }
173
+ *r = R;
173
+ *r = R;
174
+ return Q;
174
+ return Q;
175
+#else
175
+#else
176
+ uint64_t d0, d1, q0, q1, r1, r0, m;
176
+ uint64_t d0, d1, q0, q1, r1, r0, m;
177
+
177
+
178
+ d0 = (uint32_t)d;
178
+ d0 = (uint32_t)d;
179
+ d1 = d >> 32;
179
+ d1 = d >> 32;
180
+
180
+
181
+ r1 = n1 % d1;
181
+ r1 = n1 % d1;
182
+ q1 = n1 / d1;
182
+ q1 = n1 / d1;
183
+ m = q1 * d0;
183
+ m = q1 * d0;
184
+ r1 = (r1 << 32) | (n0 >> 32);
184
+ r1 = (r1 << 32) | (n0 >> 32);
185
+ if (r1 < m) {
185
+ if (r1 < m) {
186
+ q1 -= 1;
186
+ q1 -= 1;
187
+ r1 += d;
187
+ r1 += d;
188
+ if (r1 >= d) {
188
+ if (r1 >= d) {
189
+ if (r1 < m) {
189
+ if (r1 < m) {
190
+ q1 -= 1;
190
+ q1 -= 1;
191
+ r1 += d;
191
+ r1 += d;
192
+ }
192
+ }
193
+ }
193
+ }
194
+ }
194
+ }
195
+ r1 -= m;
195
+ r1 -= m;
196
+
196
+
197
+ r0 = r1 % d1;
197
+ r0 = r1 % d1;
198
+ q0 = r1 / d1;
198
+ q0 = r1 / d1;
199
+ m = q0 * d0;
199
+ m = q0 * d0;
200
+ r0 = (r0 << 32) | (uint32_t)n0;
200
+ r0 = (r0 << 32) | (uint32_t)n0;
201
+ if (r0 < m) {
201
+ if (r0 < m) {
202
+ q0 -= 1;
202
+ q0 -= 1;
203
+ r0 += d;
203
+ r0 += d;
204
+ if (r0 >= d) {
204
+ if (r0 >= d) {
205
+ if (r0 < m) {
205
+ if (r0 < m) {
206
+ q0 -= 1;
206
+ q0 -= 1;
207
+ r0 += d;
207
+ r0 += d;
208
+ }
208
+ }
209
+ }
209
+ }
210
+ }
210
+ }
211
+ r0 -= m;
211
+ r0 -= m;
212
+
212
+
213
+ *r = r0;
213
+ *r = r0;
214
+ return (q1 << 32) | q0;
214
+ return (q1 << 32) | q0;
215
+#endif
215
+#endif
216
+}
216
+}
217
+
217
+
218
#endif
218
#endif
219
--
219
--
220
2.25.1
220
2.25.1
221
221
222
222
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
2
3
These will be used to implement new decimal floating point
3
These will be used to implement new decimal floating point
4
instructions from Power ISA 3.1.
4
instructions from Power ISA 3.1.
5
5
6
The remainder is now returned directly by divu128/divs128,
6
The remainder is now returned directly by divu128/divs128,
7
freeing up phigh to receive the high 64 bits of the quotient.
7
freeing up phigh to receive the high 64 bits of the quotient.
8
8
9
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
11
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
13
---
14
include/hw/clock.h | 6 +-
14
include/hw/clock.h | 6 +-
15
include/qemu/host-utils.h | 20 ++++--
15
include/qemu/host-utils.h | 20 ++++--
16
target/ppc/int_helper.c | 9 +--
16
target/ppc/int_helper.c | 9 +--
17
util/host-utils.c | 133 +++++++++++++++++++++++++-------------
17
util/host-utils.c | 133 +++++++++++++++++++++++++-------------
18
4 files changed, 108 insertions(+), 60 deletions(-)
18
4 files changed, 108 insertions(+), 60 deletions(-)
19
19
20
diff --git a/include/hw/clock.h b/include/hw/clock.h
20
diff --git a/include/hw/clock.h b/include/hw/clock.h
21
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/clock.h
22
--- a/include/hw/clock.h
23
+++ b/include/hw/clock.h
23
+++ b/include/hw/clock.h
24
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
24
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
25
if (clk->period == 0) {
25
if (clk->period == 0) {
26
return 0;
26
return 0;
27
}
27
}
28
- /*
28
- /*
29
- * BUG: when CONFIG_INT128 is not defined, the current implementation of
29
- * BUG: when CONFIG_INT128 is not defined, the current implementation of
30
- * divu128 does not return a valid truncated quotient, so the result will
30
- * divu128 does not return a valid truncated quotient, so the result will
31
- * be wrong.
31
- * be wrong.
32
- */
32
- */
33
+
33
+
34
divu128(&lo, &hi, clk->period);
34
divu128(&lo, &hi, clk->period);
35
return lo;
35
return lo;
36
}
36
}
37
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
37
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
38
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/qemu/host-utils.h
39
--- a/include/qemu/host-utils.h
40
+++ b/include/qemu/host-utils.h
40
+++ b/include/qemu/host-utils.h
41
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
41
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
42
return (__int128_t)a * b / c;
42
return (__int128_t)a * b / c;
43
}
43
}
44
44
45
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
45
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
46
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
46
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
47
+ uint64_t divisor)
47
+ uint64_t divisor)
48
{
48
{
49
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
49
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
50
__uint128_t result = dividend / divisor;
50
__uint128_t result = dividend / divisor;
51
+
51
+
52
*plow = result;
52
*plow = result;
53
- *phigh = dividend % divisor;
53
- *phigh = dividend % divisor;
54
+ *phigh = result >> 64;
54
+ *phigh = result >> 64;
55
+ return dividend % divisor;
55
+ return dividend % divisor;
56
}
56
}
57
57
58
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
58
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
59
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
59
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
60
+ int64_t divisor)
60
+ int64_t divisor)
61
{
61
{
62
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
62
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
63
+ __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
63
+ __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
64
__int128_t result = dividend / divisor;
64
__int128_t result = dividend / divisor;
65
+
65
+
66
*plow = result;
66
*plow = result;
67
- *phigh = dividend % divisor;
67
- *phigh = dividend % divisor;
68
+ *phigh = result >> 64;
68
+ *phigh = result >> 64;
69
+ return dividend % divisor;
69
+ return dividend % divisor;
70
}
70
}
71
#else
71
#else
72
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
72
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
73
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
73
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
74
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
74
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
75
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
75
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
76
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
76
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
77
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
77
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
78
78
79
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
79
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
80
{
80
{
81
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
81
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
82
index XXXXXXX..XXXXXXX 100644
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/ppc/int_helper.c
83
--- a/target/ppc/int_helper.c
84
+++ b/target/ppc/int_helper.c
84
+++ b/target/ppc/int_helper.c
85
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
85
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
86
86
87
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
87
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
88
{
88
{
89
- int64_t rt = 0;
89
- int64_t rt = 0;
90
+ uint64_t rt = 0;
90
+ uint64_t rt = 0;
91
int64_t ra = (int64_t)rau;
91
int64_t ra = (int64_t)rau;
92
int64_t rb = (int64_t)rbu;
92
int64_t rb = (int64_t)rbu;
93
int overflow = 0;
93
int overflow = 0;
94
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
94
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
95
int cr;
95
int cr;
96
uint64_t lo_value;
96
uint64_t lo_value;
97
uint64_t hi_value;
97
uint64_t hi_value;
98
+ uint64_t rem;
98
+ uint64_t rem;
99
ppc_avr_t ret = { .u64 = { 0, 0 } };
99
ppc_avr_t ret = { .u64 = { 0, 0 } };
100
100
101
if (b->VsrSD(0) < 0) {
101
if (b->VsrSD(0) < 0) {
102
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
102
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
103
* In that case, we leave r unchanged.
103
* In that case, we leave r unchanged.
104
*/
104
*/
105
} else {
105
} else {
106
- divu128(&lo_value, &hi_value, 1000000000000000ULL);
106
- divu128(&lo_value, &hi_value, 1000000000000000ULL);
107
+ rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
107
+ rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
108
108
109
- for (i = 1; i < 16; hi_value /= 10, i++) {
109
- for (i = 1; i < 16; hi_value /= 10, i++) {
110
- bcd_put_digit(&ret, hi_value % 10, i);
110
- bcd_put_digit(&ret, hi_value % 10, i);
111
+ for (i = 1; i < 16; rem /= 10, i++) {
111
+ for (i = 1; i < 16; rem /= 10, i++) {
112
+ bcd_put_digit(&ret, rem % 10, i);
112
+ bcd_put_digit(&ret, rem % 10, i);
113
}
113
}
114
114
115
for (; i < 32; lo_value /= 10, i++) {
115
for (; i < 32; lo_value /= 10, i++) {
116
diff --git a/util/host-utils.c b/util/host-utils.c
116
diff --git a/util/host-utils.c b/util/host-utils.c
117
index XXXXXXX..XXXXXXX 100644
117
index XXXXXXX..XXXXXXX 100644
118
--- a/util/host-utils.c
118
--- a/util/host-utils.c
119
+++ b/util/host-utils.c
119
+++ b/util/host-utils.c
120
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
120
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
121
}
121
}
122
122
123
/*
123
/*
124
- * Unsigned 128-by-64 division. Returns quotient via plow and
124
- * Unsigned 128-by-64 division. Returns quotient via plow and
125
- * remainder via phigh.
125
- * remainder via phigh.
126
- * The result must fit in 64 bits (plow) - otherwise, the result
126
- * The result must fit in 64 bits (plow) - otherwise, the result
127
- * is undefined.
127
- * is undefined.
128
- * This function will cause a division by zero if passed a zero divisor.
128
- * This function will cause a division by zero if passed a zero divisor.
129
+ * Unsigned 128-by-64 division.
129
+ * Unsigned 128-by-64 division.
130
+ * Returns the remainder.
130
+ * Returns the remainder.
131
+ * Returns quotient via plow and phigh.
131
+ * Returns quotient via plow and phigh.
132
+ * Also returns the remainder via the function return value.
132
+ * Also returns the remainder via the function return value.
133
*/
133
*/
134
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
134
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
135
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
135
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
136
{
136
{
137
uint64_t dhi = *phigh;
137
uint64_t dhi = *phigh;
138
uint64_t dlo = *plow;
138
uint64_t dlo = *plow;
139
- unsigned i;
139
- unsigned i;
140
- uint64_t carry = 0;
140
- uint64_t carry = 0;
141
+ uint64_t rem, dhighest;
141
+ uint64_t rem, dhighest;
142
+ int sh;
142
+ int sh;
143
143
144
if (divisor == 0 || dhi == 0) {
144
if (divisor == 0 || dhi == 0) {
145
*plow = dlo / divisor;
145
*plow = dlo / divisor;
146
- *phigh = dlo % divisor;
146
- *phigh = dlo % divisor;
147
+ *phigh = 0;
147
+ *phigh = 0;
148
+ return dlo % divisor;
148
+ return dlo % divisor;
149
} else {
149
} else {
150
+ sh = clz64(divisor);
150
+ sh = clz64(divisor);
151
151
152
- for (i = 0; i < 64; i++) {
152
- for (i = 0; i < 64; i++) {
153
- carry = dhi >> 63;
153
- carry = dhi >> 63;
154
- dhi = (dhi << 1) | (dlo >> 63);
154
- dhi = (dhi << 1) | (dlo >> 63);
155
- if (carry || (dhi >= divisor)) {
155
- if (carry || (dhi >= divisor)) {
156
- dhi -= divisor;
156
- dhi -= divisor;
157
- carry = 1;
157
- carry = 1;
158
- } else {
158
- } else {
159
- carry = 0;
159
- carry = 0;
160
+ if (dhi < divisor) {
160
+ if (dhi < divisor) {
161
+ if (sh != 0) {
161
+ if (sh != 0) {
162
+ /* normalize the divisor, shifting the dividend accordingly */
162
+ /* normalize the divisor, shifting the dividend accordingly */
163
+ divisor <<= sh;
163
+ divisor <<= sh;
164
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
164
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
165
+ dlo <<= sh;
165
+ dlo <<= sh;
166
}
166
}
167
- dlo = (dlo << 1) | carry;
167
- dlo = (dlo << 1) | carry;
168
+
168
+
169
+ *phigh = 0;
169
+ *phigh = 0;
170
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
170
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
171
+ } else {
171
+ } else {
172
+ if (sh != 0) {
172
+ if (sh != 0) {
173
+ /* normalize the divisor, shifting the dividend accordingly */
173
+ /* normalize the divisor, shifting the dividend accordingly */
174
+ divisor <<= sh;
174
+ divisor <<= sh;
175
+ dhighest = dhi >> (64 - sh);
175
+ dhighest = dhi >> (64 - sh);
176
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
176
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
177
+ dlo <<= sh;
177
+ dlo <<= sh;
178
+
178
+
179
+ *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
179
+ *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
180
+ } else {
180
+ } else {
181
+ /**
181
+ /**
182
+ * dhi >= divisor
182
+ * dhi >= divisor
183
+ * Since the MSB of divisor is set (sh == 0),
183
+ * Since the MSB of divisor is set (sh == 0),
184
+ * (dhi - divisor) < divisor
184
+ * (dhi - divisor) < divisor
185
+ *
185
+ *
186
+ * Thus, the high part of the quotient is 1, and we can
186
+ * Thus, the high part of the quotient is 1, and we can
187
+ * calculate the low part with a single call to udiv_qrnnd
187
+ * calculate the low part with a single call to udiv_qrnnd
188
+ * after subtracting divisor from dhi
188
+ * after subtracting divisor from dhi
189
+ */
189
+ */
190
+ dhi -= divisor;
190
+ dhi -= divisor;
191
+ *phigh = 1;
191
+ *phigh = 1;
192
+ }
192
+ }
193
+
193
+
194
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
194
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
195
}
195
}
196
196
197
- *plow = dlo;
197
- *plow = dlo;
198
- *phigh = dhi;
198
- *phigh = dhi;
199
+ /*
199
+ /*
200
+ * since the dividend/divisor might have been normalized,
200
+ * since the dividend/divisor might have been normalized,
201
+ * the remainder might also have to be shifted back
201
+ * the remainder might also have to be shifted back
202
+ */
202
+ */
203
+ return rem >> sh;
203
+ return rem >> sh;
204
}
204
}
205
}
205
}
206
206
207
/*
207
/*
208
- * Signed 128-by-64 division. Returns quotient via plow and
208
- * Signed 128-by-64 division. Returns quotient via plow and
209
- * remainder via phigh.
209
- * remainder via phigh.
210
- * The result must fit in 64 bits (plow) - otherwise, the result
210
- * The result must fit in 64 bits (plow) - otherwise, the result
211
- * is undefined.
211
- * is undefined.
212
- * This function will cause a division by zero if passed a zero divisor.
212
- * This function will cause a division by zero if passed a zero divisor.
213
+ * Signed 128-by-64 division.
213
+ * Signed 128-by-64 division.
214
+ * Returns quotient via plow and phigh.
214
+ * Returns quotient via plow and phigh.
215
+ * Also returns the remainder via the function return value.
215
+ * Also returns the remainder via the function return value.
216
*/
216
*/
217
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
217
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
218
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
218
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
219
{
219
{
220
- int sgn_dvdnd = *phigh < 0;
220
- int sgn_dvdnd = *phigh < 0;
221
- int sgn_divsr = divisor < 0;
221
- int sgn_divsr = divisor < 0;
222
+ bool neg_quotient = false, neg_remainder = false;
222
+ bool neg_quotient = false, neg_remainder = false;
223
+ uint64_t unsig_hi = *phigh, unsig_lo = *plow;
223
+ uint64_t unsig_hi = *phigh, unsig_lo = *plow;
224
+ uint64_t rem;
224
+ uint64_t rem;
225
225
226
- if (sgn_dvdnd) {
226
- if (sgn_dvdnd) {
227
- *plow = ~(*plow);
227
- *plow = ~(*plow);
228
- *phigh = ~(*phigh);
228
- *phigh = ~(*phigh);
229
- if (*plow == (int64_t)-1) {
229
- if (*plow == (int64_t)-1) {
230
+ if (*phigh < 0) {
230
+ if (*phigh < 0) {
231
+ neg_quotient = !neg_quotient;
231
+ neg_quotient = !neg_quotient;
232
+ neg_remainder = !neg_remainder;
232
+ neg_remainder = !neg_remainder;
233
+
233
+
234
+ if (unsig_lo == 0) {
234
+ if (unsig_lo == 0) {
235
+ unsig_hi = -unsig_hi;
235
+ unsig_hi = -unsig_hi;
236
+ } else {
236
+ } else {
237
+ unsig_hi = ~unsig_hi;
237
+ unsig_hi = ~unsig_hi;
238
+ unsig_lo = -unsig_lo;
238
+ unsig_lo = -unsig_lo;
239
+ }
239
+ }
240
+ }
240
+ }
241
+
241
+
242
+ if (divisor < 0) {
242
+ if (divisor < 0) {
243
+ neg_quotient = !neg_quotient;
243
+ neg_quotient = !neg_quotient;
244
+
244
+
245
+ divisor = -divisor;
245
+ divisor = -divisor;
246
+ }
246
+ }
247
+
247
+
248
+ rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
248
+ rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
249
+
249
+
250
+ if (neg_quotient) {
250
+ if (neg_quotient) {
251
+ if (unsig_lo == 0) {
251
+ if (unsig_lo == 0) {
252
+ *phigh = -unsig_hi;
252
+ *phigh = -unsig_hi;
253
*plow = 0;
253
*plow = 0;
254
- (*phigh)++;
254
- (*phigh)++;
255
- } else {
255
- } else {
256
- (*plow)++;
256
- (*plow)++;
257
- }
257
- }
258
+ } else {
258
+ } else {
259
+ *phigh = ~unsig_hi;
259
+ *phigh = ~unsig_hi;
260
+ *plow = -unsig_lo;
260
+ *plow = -unsig_lo;
261
+ }
261
+ }
262
+ } else {
262
+ } else {
263
+ *phigh = unsig_hi;
263
+ *phigh = unsig_hi;
264
+ *plow = unsig_lo;
264
+ *plow = unsig_lo;
265
}
265
}
266
266
267
- if (sgn_divsr) {
267
- if (sgn_divsr) {
268
- divisor = 0 - divisor;
268
- divisor = 0 - divisor;
269
- }
269
- }
270
-
270
-
271
- divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
271
- divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
272
-
272
-
273
- if (sgn_dvdnd ^ sgn_divsr) {
273
- if (sgn_dvdnd ^ sgn_divsr) {
274
- *plow = 0 - *plow;
274
- *plow = 0 - *plow;
275
+ if (neg_remainder) {
275
+ if (neg_remainder) {
276
+ return -rem;
276
+ return -rem;
277
+ } else {
277
+ } else {
278
+ return rem;
278
+ return rem;
279
}
279
}
280
}
280
}
281
#endif
281
#endif
282
--
282
--
283
2.25.1
283
2.25.1
284
284
285
285
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
From: Luis Pires <luis.pires@eldorado.org.br>
2
2
3
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
3
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
5
Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
8
tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
9
tests/unit/meson.build | 1 +
9
tests/unit/meson.build | 1 +
10
2 files changed, 198 insertions(+)
10
2 files changed, 198 insertions(+)
11
create mode 100644 tests/unit/test-div128.c
11
create mode 100644 tests/unit/test-div128.c
12
12
13
diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
13
diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
14
new file mode 100644
14
new file mode 100644
15
index XXXXXXX..XXXXXXX
15
index XXXXXXX..XXXXXXX
16
--- /dev/null
16
--- /dev/null
17
+++ b/tests/unit/test-div128.c
17
+++ b/tests/unit/test-div128.c
18
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@
19
+/*
19
+/*
20
+ * Test 128-bit division functions
20
+ * Test 128-bit division functions
21
+ *
21
+ *
22
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
22
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
23
+ *
23
+ *
24
+ * This library is free software; you can redistribute it and/or
24
+ * This library is free software; you can redistribute it and/or
25
+ * modify it under the terms of the GNU Lesser General Public
25
+ * modify it under the terms of the GNU Lesser General Public
26
+ * License as published by the Free Software Foundation; either
26
+ * License as published by the Free Software Foundation; either
27
+ * version 2.1 of the License, or (at your option) any later version.
27
+ * version 2.1 of the License, or (at your option) any later version.
28
+ *
28
+ *
29
+ * This library is distributed in the hope that it will be useful,
29
+ * This library is distributed in the hope that it will be useful,
30
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32
+ * Lesser General Public License for more details.
32
+ * Lesser General Public License for more details.
33
+ *
33
+ *
34
+ * You should have received a copy of the GNU Lesser General Public
34
+ * You should have received a copy of the GNU Lesser General Public
35
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
35
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
36
+ */
36
+ */
37
+
37
+
38
+#include "qemu/osdep.h"
38
+#include "qemu/osdep.h"
39
+#include "qemu/host-utils.h"
39
+#include "qemu/host-utils.h"
40
+
40
+
41
+typedef struct {
41
+typedef struct {
42
+ uint64_t high;
42
+ uint64_t high;
43
+ uint64_t low;
43
+ uint64_t low;
44
+ uint64_t rhigh;
44
+ uint64_t rhigh;
45
+ uint64_t rlow;
45
+ uint64_t rlow;
46
+ uint64_t divisor;
46
+ uint64_t divisor;
47
+ uint64_t remainder;
47
+ uint64_t remainder;
48
+} test_data_unsigned;
48
+} test_data_unsigned;
49
+
49
+
50
+typedef struct {
50
+typedef struct {
51
+ int64_t high;
51
+ int64_t high;
52
+ uint64_t low;
52
+ uint64_t low;
53
+ int64_t rhigh;
53
+ int64_t rhigh;
54
+ uint64_t rlow;
54
+ uint64_t rlow;
55
+ int64_t divisor;
55
+ int64_t divisor;
56
+ int64_t remainder;
56
+ int64_t remainder;
57
+} test_data_signed;
57
+} test_data_signed;
58
+
58
+
59
+static const test_data_unsigned test_table_unsigned[] = {
59
+static const test_data_unsigned test_table_unsigned[] = {
60
+ /* Dividend fits in 64 bits */
60
+ /* Dividend fits in 64 bits */
61
+ { 0x0000000000000000ULL, 0x0000000000000000ULL,
61
+ { 0x0000000000000000ULL, 0x0000000000000000ULL,
62
+ 0x0000000000000000ULL, 0x0000000000000000ULL,
62
+ 0x0000000000000000ULL, 0x0000000000000000ULL,
63
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
63
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
64
+ { 0x0000000000000000ULL, 0x0000000000000001ULL,
64
+ { 0x0000000000000000ULL, 0x0000000000000001ULL,
65
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
65
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
66
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
66
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
67
+ { 0x0000000000000000ULL, 0x0000000000000003ULL,
67
+ { 0x0000000000000000ULL, 0x0000000000000003ULL,
68
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
68
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
69
+ 0x0000000000000002ULL, 0x0000000000000001ULL},
69
+ 0x0000000000000002ULL, 0x0000000000000001ULL},
70
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
70
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
71
+ 0x0000000000000000ULL, 0x8000000000000000ULL,
71
+ 0x0000000000000000ULL, 0x8000000000000000ULL,
72
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
72
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
73
+ { 0x0000000000000000ULL, 0xa000000000000000ULL,
73
+ { 0x0000000000000000ULL, 0xa000000000000000ULL,
74
+ 0x0000000000000000ULL, 0x0000000000000002ULL,
74
+ 0x0000000000000000ULL, 0x0000000000000002ULL,
75
+ 0x4000000000000000ULL, 0x2000000000000000ULL},
75
+ 0x4000000000000000ULL, 0x2000000000000000ULL},
76
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
76
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
77
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
77
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
78
+ 0x8000000000000000ULL, 0x0000000000000000ULL},
78
+ 0x8000000000000000ULL, 0x0000000000000000ULL},
79
+
79
+
80
+ /* Dividend > 64 bits, with MSB 0 */
80
+ /* Dividend > 64 bits, with MSB 0 */
81
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
81
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
82
+ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
82
+ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
83
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
83
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
84
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
84
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
85
+ 0x0000000000000001ULL, 0x000000000000000dULL,
85
+ 0x0000000000000001ULL, 0x000000000000000dULL,
86
+ 0x123456789abcdefeULL, 0x03456789abcdf03bULL},
86
+ 0x123456789abcdefeULL, 0x03456789abcdf03bULL},
87
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
87
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
88
+ 0x0123456789abcdefULL, 0xeefedcba98765432ULL,
88
+ 0x0123456789abcdefULL, 0xeefedcba98765432ULL,
89
+ 0x0000000000000010ULL, 0x0000000000000001ULL},
89
+ 0x0000000000000010ULL, 0x0000000000000001ULL},
90
+
90
+
91
+ /* Dividend > 64 bits, with MSB 1 */
91
+ /* Dividend > 64 bits, with MSB 1 */
92
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
92
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
93
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
93
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
94
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
94
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
95
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
95
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
96
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
96
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
97
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
97
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
98
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
98
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
99
+ 0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
99
+ 0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
100
+ 0x0000000000000010ULL, 0x000000000000000fULL},
100
+ 0x0000000000000010ULL, 0x000000000000000fULL},
101
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
101
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
102
+ 0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
102
+ 0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
103
+ 0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
103
+ 0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
104
+
104
+
105
+ /**
105
+ /**
106
+ * Divisor == 64 bits, with MSB 1
106
+ * Divisor == 64 bits, with MSB 1
107
+ * and high 64 bits of dividend >= divisor
107
+ * and high 64 bits of dividend >= divisor
108
+ * (for testing normalization)
108
+ * (for testing normalization)
109
+ */
109
+ */
110
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
110
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
111
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
111
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
112
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
112
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
113
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
113
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
114
+ 0x0000000000000001ULL, 0xfddbb9977553310aULL,
114
+ 0x0000000000000001ULL, 0xfddbb9977553310aULL,
115
+ 0x8000000000000001ULL, 0x78899aabbccddf05ULL},
115
+ 0x8000000000000001ULL, 0x78899aabbccddf05ULL},
116
+
116
+
117
+ /* Dividend > 64 bits, divisor almost as big */
117
+ /* Dividend > 64 bits, divisor almost as big */
118
+ { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
118
+ { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
119
+ 0x0000000000000000ULL, 0x000000000000000fULL,
119
+ 0x0000000000000000ULL, 0x000000000000000fULL,
120
+ 0x123456789abcdefeULL, 0x123456789abcde1fULL},
120
+ 0x123456789abcdefeULL, 0x123456789abcde1fULL},
121
+};
121
+};
122
+
122
+
123
+static const test_data_signed test_table_signed[] = {
123
+static const test_data_signed test_table_signed[] = {
124
+ /* Positive dividend, positive/negative divisors */
124
+ /* Positive dividend, positive/negative divisors */
125
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
125
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
126
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
126
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
127
+ 0x0000000000000001LL, 0x0000000000000000LL},
127
+ 0x0000000000000001LL, 0x0000000000000000LL},
128
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
128
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
129
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
129
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
130
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
130
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
131
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
131
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
132
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
132
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
133
+ 0x0000000000000002LL, 0x0000000000000000LL},
133
+ 0x0000000000000002LL, 0x0000000000000000LL},
134
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
134
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
135
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
135
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
136
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
136
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
137
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
137
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
138
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
138
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
139
+ 0x0000000000000008LL, 0x0000000000000006LL},
139
+ 0x0000000000000008LL, 0x0000000000000006LL},
140
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
140
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
141
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
141
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
142
+ 0xfffffffffffffff8LL, 0x0000000000000006LL},
142
+ 0xfffffffffffffff8LL, 0x0000000000000006LL},
143
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
143
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
144
+ 0x0000000000000000LL, 0x000000000000550dULL,
144
+ 0x0000000000000000LL, 0x000000000000550dULL,
145
+ 0x0000000000000237LL, 0x0000000000000183LL},
145
+ 0x0000000000000237LL, 0x0000000000000183LL},
146
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
146
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
147
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
147
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
148
+ 0xfffffffffffffdc9LL, 0x0000000000000183LL},
148
+ 0xfffffffffffffdc9LL, 0x0000000000000183LL},
149
+
149
+
150
+ /* Negative dividend, positive/negative divisors */
150
+ /* Negative dividend, positive/negative divisors */
151
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
151
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
152
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
152
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
153
+ 0x0000000000000001LL, 0x0000000000000000LL},
153
+ 0x0000000000000001LL, 0x0000000000000000LL},
154
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
154
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
155
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
155
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
156
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
156
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
157
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
157
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
158
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
158
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
159
+ 0x0000000000000002LL, 0x0000000000000000LL},
159
+ 0x0000000000000002LL, 0x0000000000000000LL},
160
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
160
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
161
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
161
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
162
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
162
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
163
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
163
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
164
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
164
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
165
+ 0x0000000000000008LL, 0xfffffffffffffffaLL},
165
+ 0x0000000000000008LL, 0xfffffffffffffffaLL},
166
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
166
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
167
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
167
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
168
+ 0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
168
+ 0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
169
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
169
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
170
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
170
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
171
+ 0x0000000000000237LL, 0xfffffffffffffe7dLL},
171
+ 0x0000000000000237LL, 0xfffffffffffffe7dLL},
172
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
172
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
173
+ 0x0000000000000000LL, 0x000000000000550dULL,
173
+ 0x0000000000000000LL, 0x000000000000550dULL,
174
+ 0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
174
+ 0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
175
+};
175
+};
176
+
176
+
177
+static void test_divu128(void)
177
+static void test_divu128(void)
178
+{
178
+{
179
+ int i;
179
+ int i;
180
+ uint64_t rem;
180
+ uint64_t rem;
181
+ test_data_unsigned tmp;
181
+ test_data_unsigned tmp;
182
+
182
+
183
+ for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
183
+ for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
184
+ tmp = test_table_unsigned[i];
184
+ tmp = test_table_unsigned[i];
185
+
185
+
186
+ rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
186
+ rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
187
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
187
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
188
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
188
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
189
+ g_assert_cmpuint(rem, ==, tmp.remainder);
189
+ g_assert_cmpuint(rem, ==, tmp.remainder);
190
+ }
190
+ }
191
+}
191
+}
192
+
192
+
193
+static void test_divs128(void)
193
+static void test_divs128(void)
194
+{
194
+{
195
+ int i;
195
+ int i;
196
+ int64_t rem;
196
+ int64_t rem;
197
+ test_data_signed tmp;
197
+ test_data_signed tmp;
198
+
198
+
199
+ for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
199
+ for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
200
+ tmp = test_table_signed[i];
200
+ tmp = test_table_signed[i];
201
+
201
+
202
+ rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
202
+ rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
203
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
203
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
204
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
204
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
205
+ g_assert_cmpuint(rem, ==, tmp.remainder);
205
+ g_assert_cmpuint(rem, ==, tmp.remainder);
206
+ }
206
+ }
207
+}
207
+}
208
+
208
+
209
+int main(int argc, char **argv)
209
+int main(int argc, char **argv)
210
+{
210
+{
211
+ g_test_init(&argc, &argv, NULL);
211
+ g_test_init(&argc, &argv, NULL);
212
+ g_test_add_func("/host-utils/test_divu128", test_divu128);
212
+ g_test_add_func("/host-utils/test_divu128", test_divu128);
213
+ g_test_add_func("/host-utils/test_divs128", test_divs128);
213
+ g_test_add_func("/host-utils/test_divs128", test_divs128);
214
+ return g_test_run();
214
+ return g_test_run();
215
+}
215
+}
216
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
216
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
217
index XXXXXXX..XXXXXXX 100644
217
index XXXXXXX..XXXXXXX 100644
218
--- a/tests/unit/meson.build
218
--- a/tests/unit/meson.build
219
+++ b/tests/unit/meson.build
219
+++ b/tests/unit/meson.build
220
@@ -XXX,XX +XXX,XX @@ tests = {
220
@@ -XXX,XX +XXX,XX @@ tests = {
221
# all code tested by test-x86-cpuid is inside topology.h
221
# all code tested by test-x86-cpuid is inside topology.h
222
'test-x86-cpuid': [],
222
'test-x86-cpuid': [],
223
'test-cutils': [],
223
'test-cutils': [],
224
+ 'test-div128': [],
224
+ 'test-div128': [],
225
'test-shift128': [],
225
'test-shift128': [],
226
'test-mul64': [],
226
'test-mul64': [],
227
# all code tested by test-int128 is inside int128.h
227
# all code tested by test-int128 is inside int128.h
228
--
228
--
229
2.25.1
229
2.25.1
230
230
231
231
diff view generated by jsdifflib
1
Prepare for tracking different masks by renaming this one.
1
Prepare for tracking different masks by renaming this one.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
8
tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
9
1 file changed, 72 insertions(+), 70 deletions(-)
9
1 file changed, 72 insertions(+), 70 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
16
TCGTemp *prev_copy;
16
TCGTemp *prev_copy;
17
TCGTemp *next_copy;
17
TCGTemp *next_copy;
18
uint64_t val;
18
uint64_t val;
19
- uint64_t mask;
19
- uint64_t mask;
20
+ uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
20
+ uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
21
} TempOptInfo;
21
} TempOptInfo;
22
22
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
24
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
24
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
25
ti->next_copy = ts;
25
ti->next_copy = ts;
26
ti->prev_copy = ts;
26
ti->prev_copy = ts;
27
ti->is_const = false;
27
ti->is_const = false;
28
- ti->mask = -1;
28
- ti->mask = -1;
29
+ ti->z_mask = -1;
29
+ ti->z_mask = -1;
30
}
30
}
31
31
32
static void reset_temp(TCGArg arg)
32
static void reset_temp(TCGArg arg)
33
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
33
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
34
if (ts->kind == TEMP_CONST) {
34
if (ts->kind == TEMP_CONST) {
35
ti->is_const = true;
35
ti->is_const = true;
36
ti->val = ts->val;
36
ti->val = ts->val;
37
- ti->mask = ts->val;
37
- ti->mask = ts->val;
38
+ ti->z_mask = ts->val;
38
+ ti->z_mask = ts->val;
39
if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
39
if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
40
/* High bits of a 32-bit quantity are garbage. */
40
/* High bits of a 32-bit quantity are garbage. */
41
- ti->mask |= ~0xffffffffull;
41
- ti->mask |= ~0xffffffffull;
42
+ ti->z_mask |= ~0xffffffffull;
42
+ ti->z_mask |= ~0xffffffffull;
43
}
43
}
44
} else {
44
} else {
45
ti->is_const = false;
45
ti->is_const = false;
46
- ti->mask = -1;
46
- ti->mask = -1;
47
+ ti->z_mask = -1;
47
+ ti->z_mask = -1;
48
}
48
}
49
}
49
}
50
50
51
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
51
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
52
const TCGOpDef *def;
52
const TCGOpDef *def;
53
TempOptInfo *di;
53
TempOptInfo *di;
54
TempOptInfo *si;
54
TempOptInfo *si;
55
- uint64_t mask;
55
- uint64_t mask;
56
+ uint64_t z_mask;
56
+ uint64_t z_mask;
57
TCGOpcode new_op;
57
TCGOpcode new_op;
58
58
59
if (ts_are_copies(dst_ts, src_ts)) {
59
if (ts_are_copies(dst_ts, src_ts)) {
60
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
60
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
61
op->args[0] = dst;
61
op->args[0] = dst;
62
op->args[1] = src;
62
op->args[1] = src;
63
63
64
- mask = si->mask;
64
- mask = si->mask;
65
+ z_mask = si->z_mask;
65
+ z_mask = si->z_mask;
66
if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
66
if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
67
/* High bits of the destination are now garbage. */
67
/* High bits of the destination are now garbage. */
68
- mask |= ~0xffffffffull;
68
- mask |= ~0xffffffffull;
69
+ z_mask |= ~0xffffffffull;
69
+ z_mask |= ~0xffffffffull;
70
}
70
}
71
- di->mask = mask;
71
- di->mask = mask;
72
+ di->z_mask = z_mask;
72
+ di->z_mask = z_mask;
73
73
74
if (src_ts->type == dst_ts->type) {
74
if (src_ts->type == dst_ts->type) {
75
TempOptInfo *ni = ts_info(si->next_copy);
75
TempOptInfo *ni = ts_info(si->next_copy);
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
}
77
}
78
78
79
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
79
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
80
- uint64_t mask, partmask, affected, tmp;
80
- uint64_t mask, partmask, affected, tmp;
81
+ uint64_t z_mask, partmask, affected, tmp;
81
+ uint64_t z_mask, partmask, affected, tmp;
82
int nb_oargs, nb_iargs;
82
int nb_oargs, nb_iargs;
83
TCGOpcode opc = op->opc;
83
TCGOpcode opc = op->opc;
84
const TCGOpDef *def = &tcg_op_defs[opc];
84
const TCGOpDef *def = &tcg_op_defs[opc];
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
86
87
/* Simplify using known-zero bits. Currently only ops with a single
87
/* Simplify using known-zero bits. Currently only ops with a single
88
output argument is supported. */
88
output argument is supported. */
89
- mask = -1;
89
- mask = -1;
90
+ z_mask = -1;
90
+ z_mask = -1;
91
affected = -1;
91
affected = -1;
92
switch (opc) {
92
switch (opc) {
93
CASE_OP_32_64(ext8s):
93
CASE_OP_32_64(ext8s):
94
- if ((arg_info(op->args[1])->mask & 0x80) != 0) {
94
- if ((arg_info(op->args[1])->mask & 0x80) != 0) {
95
+ if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
95
+ if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
96
break;
96
break;
97
}
97
}
98
QEMU_FALLTHROUGH;
98
QEMU_FALLTHROUGH;
99
CASE_OP_32_64(ext8u):
99
CASE_OP_32_64(ext8u):
100
- mask = 0xff;
100
- mask = 0xff;
101
+ z_mask = 0xff;
101
+ z_mask = 0xff;
102
goto and_const;
102
goto and_const;
103
CASE_OP_32_64(ext16s):
103
CASE_OP_32_64(ext16s):
104
- if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
104
- if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
105
+ if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
105
+ if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
106
break;
106
break;
107
}
107
}
108
QEMU_FALLTHROUGH;
108
QEMU_FALLTHROUGH;
109
CASE_OP_32_64(ext16u):
109
CASE_OP_32_64(ext16u):
110
- mask = 0xffff;
110
- mask = 0xffff;
111
+ z_mask = 0xffff;
111
+ z_mask = 0xffff;
112
goto and_const;
112
goto and_const;
113
case INDEX_op_ext32s_i64:
113
case INDEX_op_ext32s_i64:
114
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
114
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
115
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
115
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
116
break;
116
break;
117
}
117
}
118
QEMU_FALLTHROUGH;
118
QEMU_FALLTHROUGH;
119
case INDEX_op_ext32u_i64:
119
case INDEX_op_ext32u_i64:
120
- mask = 0xffffffffU;
120
- mask = 0xffffffffU;
121
+ z_mask = 0xffffffffU;
121
+ z_mask = 0xffffffffU;
122
goto and_const;
122
goto and_const;
123
123
124
CASE_OP_32_64(and):
124
CASE_OP_32_64(and):
125
- mask = arg_info(op->args[2])->mask;
125
- mask = arg_info(op->args[2])->mask;
126
+ z_mask = arg_info(op->args[2])->z_mask;
126
+ z_mask = arg_info(op->args[2])->z_mask;
127
if (arg_is_const(op->args[2])) {
127
if (arg_is_const(op->args[2])) {
128
and_const:
128
and_const:
129
- affected = arg_info(op->args[1])->mask & ~mask;
129
- affected = arg_info(op->args[1])->mask & ~mask;
130
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
130
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
131
}
131
}
132
- mask = arg_info(op->args[1])->mask & mask;
132
- mask = arg_info(op->args[1])->mask & mask;
133
+ z_mask = arg_info(op->args[1])->z_mask & z_mask;
133
+ z_mask = arg_info(op->args[1])->z_mask & z_mask;
134
break;
134
break;
135
135
136
case INDEX_op_ext_i32_i64:
136
case INDEX_op_ext_i32_i64:
137
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
137
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
138
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
138
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
139
break;
139
break;
140
}
140
}
141
QEMU_FALLTHROUGH;
141
QEMU_FALLTHROUGH;
142
case INDEX_op_extu_i32_i64:
142
case INDEX_op_extu_i32_i64:
143
/* We do not compute affected as it is a size changing op. */
143
/* We do not compute affected as it is a size changing op. */
144
- mask = (uint32_t)arg_info(op->args[1])->mask;
144
- mask = (uint32_t)arg_info(op->args[1])->mask;
145
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
145
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
146
break;
146
break;
147
147
148
CASE_OP_32_64(andc):
148
CASE_OP_32_64(andc):
149
/* Known-zeros does not imply known-ones. Therefore unless
149
/* Known-zeros does not imply known-ones. Therefore unless
150
op->args[2] is constant, we can't infer anything from it. */
150
op->args[2] is constant, we can't infer anything from it. */
151
if (arg_is_const(op->args[2])) {
151
if (arg_is_const(op->args[2])) {
152
- mask = ~arg_info(op->args[2])->mask;
152
- mask = ~arg_info(op->args[2])->mask;
153
+ z_mask = ~arg_info(op->args[2])->z_mask;
153
+ z_mask = ~arg_info(op->args[2])->z_mask;
154
goto and_const;
154
goto and_const;
155
}
155
}
156
/* But we certainly know nothing outside args[1] may be set. */
156
/* But we certainly know nothing outside args[1] may be set. */
157
- mask = arg_info(op->args[1])->mask;
157
- mask = arg_info(op->args[1])->mask;
158
+ z_mask = arg_info(op->args[1])->z_mask;
158
+ z_mask = arg_info(op->args[1])->z_mask;
159
break;
159
break;
160
160
161
case INDEX_op_sar_i32:
161
case INDEX_op_sar_i32:
162
if (arg_is_const(op->args[2])) {
162
if (arg_is_const(op->args[2])) {
163
tmp = arg_info(op->args[2])->val & 31;
163
tmp = arg_info(op->args[2])->val & 31;
164
- mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
164
- mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
165
+ z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
165
+ z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
166
}
166
}
167
break;
167
break;
168
case INDEX_op_sar_i64:
168
case INDEX_op_sar_i64:
169
if (arg_is_const(op->args[2])) {
169
if (arg_is_const(op->args[2])) {
170
tmp = arg_info(op->args[2])->val & 63;
170
tmp = arg_info(op->args[2])->val & 63;
171
- mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
171
- mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
172
+ z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
172
+ z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
173
}
173
}
174
break;
174
break;
175
175
176
case INDEX_op_shr_i32:
176
case INDEX_op_shr_i32:
177
if (arg_is_const(op->args[2])) {
177
if (arg_is_const(op->args[2])) {
178
tmp = arg_info(op->args[2])->val & 31;
178
tmp = arg_info(op->args[2])->val & 31;
179
- mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
179
- mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
180
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
180
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
181
}
181
}
182
break;
182
break;
183
case INDEX_op_shr_i64:
183
case INDEX_op_shr_i64:
184
if (arg_is_const(op->args[2])) {
184
if (arg_is_const(op->args[2])) {
185
tmp = arg_info(op->args[2])->val & 63;
185
tmp = arg_info(op->args[2])->val & 63;
186
- mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
186
- mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
187
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
187
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
188
}
188
}
189
break;
189
break;
190
190
191
case INDEX_op_extrl_i64_i32:
191
case INDEX_op_extrl_i64_i32:
192
- mask = (uint32_t)arg_info(op->args[1])->mask;
192
- mask = (uint32_t)arg_info(op->args[1])->mask;
193
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
193
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
194
break;
194
break;
195
case INDEX_op_extrh_i64_i32:
195
case INDEX_op_extrh_i64_i32:
196
- mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
196
- mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
197
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
197
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
198
break;
198
break;
199
199
200
CASE_OP_32_64(shl):
200
CASE_OP_32_64(shl):
201
if (arg_is_const(op->args[2])) {
201
if (arg_is_const(op->args[2])) {
202
tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
202
tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
203
- mask = arg_info(op->args[1])->mask << tmp;
203
- mask = arg_info(op->args[1])->mask << tmp;
204
+ z_mask = arg_info(op->args[1])->z_mask << tmp;
204
+ z_mask = arg_info(op->args[1])->z_mask << tmp;
205
}
205
}
206
break;
206
break;
207
207
208
CASE_OP_32_64(neg):
208
CASE_OP_32_64(neg):
209
/* Set to 1 all bits to the left of the rightmost. */
209
/* Set to 1 all bits to the left of the rightmost. */
210
- mask = -(arg_info(op->args[1])->mask
210
- mask = -(arg_info(op->args[1])->mask
211
- & -arg_info(op->args[1])->mask);
211
- & -arg_info(op->args[1])->mask);
212
+ z_mask = -(arg_info(op->args[1])->z_mask
212
+ z_mask = -(arg_info(op->args[1])->z_mask
213
+ & -arg_info(op->args[1])->z_mask);
213
+ & -arg_info(op->args[1])->z_mask);
214
break;
214
break;
215
215
216
CASE_OP_32_64(deposit):
216
CASE_OP_32_64(deposit):
217
- mask = deposit64(arg_info(op->args[1])->mask,
217
- mask = deposit64(arg_info(op->args[1])->mask,
218
- op->args[3], op->args[4],
218
- op->args[3], op->args[4],
219
- arg_info(op->args[2])->mask);
219
- arg_info(op->args[2])->mask);
220
+ z_mask = deposit64(arg_info(op->args[1])->z_mask,
220
+ z_mask = deposit64(arg_info(op->args[1])->z_mask,
221
+ op->args[3], op->args[4],
221
+ op->args[3], op->args[4],
222
+ arg_info(op->args[2])->z_mask);
222
+ arg_info(op->args[2])->z_mask);
223
break;
223
break;
224
224
225
CASE_OP_32_64(extract):
225
CASE_OP_32_64(extract):
226
- mask = extract64(arg_info(op->args[1])->mask,
226
- mask = extract64(arg_info(op->args[1])->mask,
227
- op->args[2], op->args[3]);
227
- op->args[2], op->args[3]);
228
+ z_mask = extract64(arg_info(op->args[1])->z_mask,
228
+ z_mask = extract64(arg_info(op->args[1])->z_mask,
229
+ op->args[2], op->args[3]);
229
+ op->args[2], op->args[3]);
230
if (op->args[2] == 0) {
230
if (op->args[2] == 0) {
231
- affected = arg_info(op->args[1])->mask & ~mask;
231
- affected = arg_info(op->args[1])->mask & ~mask;
232
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
232
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
233
}
233
}
234
break;
234
break;
235
CASE_OP_32_64(sextract):
235
CASE_OP_32_64(sextract):
236
- mask = sextract64(arg_info(op->args[1])->mask,
236
- mask = sextract64(arg_info(op->args[1])->mask,
237
- op->args[2], op->args[3]);
237
- op->args[2], op->args[3]);
238
- if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
238
- if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
239
- affected = arg_info(op->args[1])->mask & ~mask;
239
- affected = arg_info(op->args[1])->mask & ~mask;
240
+ z_mask = sextract64(arg_info(op->args[1])->z_mask,
240
+ z_mask = sextract64(arg_info(op->args[1])->z_mask,
241
+ op->args[2], op->args[3]);
241
+ op->args[2], op->args[3]);
242
+ if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
242
+ if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
243
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
243
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
244
}
244
}
245
break;
245
break;
246
246
247
CASE_OP_32_64(or):
247
CASE_OP_32_64(or):
248
CASE_OP_32_64(xor):
248
CASE_OP_32_64(xor):
249
- mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
249
- mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
250
+ z_mask = arg_info(op->args[1])->z_mask
250
+ z_mask = arg_info(op->args[1])->z_mask
251
+ | arg_info(op->args[2])->z_mask;
251
+ | arg_info(op->args[2])->z_mask;
252
break;
252
break;
253
253
254
case INDEX_op_clz_i32:
254
case INDEX_op_clz_i32:
255
case INDEX_op_ctz_i32:
255
case INDEX_op_ctz_i32:
256
- mask = arg_info(op->args[2])->mask | 31;
256
- mask = arg_info(op->args[2])->mask | 31;
257
+ z_mask = arg_info(op->args[2])->z_mask | 31;
257
+ z_mask = arg_info(op->args[2])->z_mask | 31;
258
break;
258
break;
259
259
260
case INDEX_op_clz_i64:
260
case INDEX_op_clz_i64:
261
case INDEX_op_ctz_i64:
261
case INDEX_op_ctz_i64:
262
- mask = arg_info(op->args[2])->mask | 63;
262
- mask = arg_info(op->args[2])->mask | 63;
263
+ z_mask = arg_info(op->args[2])->z_mask | 63;
263
+ z_mask = arg_info(op->args[2])->z_mask | 63;
264
break;
264
break;
265
265
266
case INDEX_op_ctpop_i32:
266
case INDEX_op_ctpop_i32:
267
- mask = 32 | 31;
267
- mask = 32 | 31;
268
+ z_mask = 32 | 31;
268
+ z_mask = 32 | 31;
269
break;
269
break;
270
case INDEX_op_ctpop_i64:
270
case INDEX_op_ctpop_i64:
271
- mask = 64 | 63;
271
- mask = 64 | 63;
272
+ z_mask = 64 | 63;
272
+ z_mask = 64 | 63;
273
break;
273
break;
274
274
275
CASE_OP_32_64(setcond):
275
CASE_OP_32_64(setcond):
276
case INDEX_op_setcond2_i32:
276
case INDEX_op_setcond2_i32:
277
- mask = 1;
277
- mask = 1;
278
+ z_mask = 1;
278
+ z_mask = 1;
279
break;
279
break;
280
280
281
CASE_OP_32_64(movcond):
281
CASE_OP_32_64(movcond):
282
- mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
282
- mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
283
+ z_mask = arg_info(op->args[3])->z_mask
283
+ z_mask = arg_info(op->args[3])->z_mask
284
+ | arg_info(op->args[4])->z_mask;
284
+ | arg_info(op->args[4])->z_mask;
285
break;
285
break;
286
286
287
CASE_OP_32_64(ld8u):
287
CASE_OP_32_64(ld8u):
288
- mask = 0xff;
288
- mask = 0xff;
289
+ z_mask = 0xff;
289
+ z_mask = 0xff;
290
break;
290
break;
291
CASE_OP_32_64(ld16u):
291
CASE_OP_32_64(ld16u):
292
- mask = 0xffff;
292
- mask = 0xffff;
293
+ z_mask = 0xffff;
293
+ z_mask = 0xffff;
294
break;
294
break;
295
case INDEX_op_ld32u_i64:
295
case INDEX_op_ld32u_i64:
296
- mask = 0xffffffffu;
296
- mask = 0xffffffffu;
297
+ z_mask = 0xffffffffu;
297
+ z_mask = 0xffffffffu;
298
break;
298
break;
299
299
300
CASE_OP_32_64(qemu_ld):
300
CASE_OP_32_64(qemu_ld):
301
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
301
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
302
MemOpIdx oi = op->args[nb_oargs + nb_iargs];
302
MemOpIdx oi = op->args[nb_oargs + nb_iargs];
303
MemOp mop = get_memop(oi);
303
MemOp mop = get_memop(oi);
304
if (!(mop & MO_SIGN)) {
304
if (!(mop & MO_SIGN)) {
305
- mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
305
- mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
306
+ z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
306
+ z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
307
}
307
}
308
}
308
}
309
break;
309
break;
310
310
311
CASE_OP_32_64(bswap16):
311
CASE_OP_32_64(bswap16):
312
- mask = arg_info(op->args[1])->mask;
312
- mask = arg_info(op->args[1])->mask;
313
- if (mask <= 0xffff) {
313
- if (mask <= 0xffff) {
314
+ z_mask = arg_info(op->args[1])->z_mask;
314
+ z_mask = arg_info(op->args[1])->z_mask;
315
+ if (z_mask <= 0xffff) {
315
+ if (z_mask <= 0xffff) {
316
op->args[2] |= TCG_BSWAP_IZ;
316
op->args[2] |= TCG_BSWAP_IZ;
317
}
317
}
318
- mask = bswap16(mask);
318
- mask = bswap16(mask);
319
+ z_mask = bswap16(z_mask);
319
+ z_mask = bswap16(z_mask);
320
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
320
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
321
case TCG_BSWAP_OZ:
321
case TCG_BSWAP_OZ:
322
break;
322
break;
323
case TCG_BSWAP_OS:
323
case TCG_BSWAP_OS:
324
- mask = (int16_t)mask;
324
- mask = (int16_t)mask;
325
+ z_mask = (int16_t)z_mask;
325
+ z_mask = (int16_t)z_mask;
326
break;
326
break;
327
default: /* undefined high bits */
327
default: /* undefined high bits */
328
- mask |= MAKE_64BIT_MASK(16, 48);
328
- mask |= MAKE_64BIT_MASK(16, 48);
329
+ z_mask |= MAKE_64BIT_MASK(16, 48);
329
+ z_mask |= MAKE_64BIT_MASK(16, 48);
330
break;
330
break;
331
}
331
}
332
break;
332
break;
333
333
334
case INDEX_op_bswap32_i64:
334
case INDEX_op_bswap32_i64:
335
- mask = arg_info(op->args[1])->mask;
335
- mask = arg_info(op->args[1])->mask;
336
- if (mask <= 0xffffffffu) {
336
- if (mask <= 0xffffffffu) {
337
+ z_mask = arg_info(op->args[1])->z_mask;
337
+ z_mask = arg_info(op->args[1])->z_mask;
338
+ if (z_mask <= 0xffffffffu) {
338
+ if (z_mask <= 0xffffffffu) {
339
op->args[2] |= TCG_BSWAP_IZ;
339
op->args[2] |= TCG_BSWAP_IZ;
340
}
340
}
341
- mask = bswap32(mask);
341
- mask = bswap32(mask);
342
+ z_mask = bswap32(z_mask);
342
+ z_mask = bswap32(z_mask);
343
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
343
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
344
case TCG_BSWAP_OZ:
344
case TCG_BSWAP_OZ:
345
break;
345
break;
346
case TCG_BSWAP_OS:
346
case TCG_BSWAP_OS:
347
- mask = (int32_t)mask;
347
- mask = (int32_t)mask;
348
+ z_mask = (int32_t)z_mask;
348
+ z_mask = (int32_t)z_mask;
349
break;
349
break;
350
default: /* undefined high bits */
350
default: /* undefined high bits */
351
- mask |= MAKE_64BIT_MASK(32, 32);
351
- mask |= MAKE_64BIT_MASK(32, 32);
352
+ z_mask |= MAKE_64BIT_MASK(32, 32);
352
+ z_mask |= MAKE_64BIT_MASK(32, 32);
353
break;
353
break;
354
}
354
}
355
break;
355
break;
356
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
356
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
357
/* 32-bit ops generate 32-bit results. For the result is zero test
357
/* 32-bit ops generate 32-bit results. For the result is zero test
358
below, we can ignore high bits, but for further optimizations we
358
below, we can ignore high bits, but for further optimizations we
359
need to record that the high bits contain garbage. */
359
need to record that the high bits contain garbage. */
360
- partmask = mask;
360
- partmask = mask;
361
+ partmask = z_mask;
361
+ partmask = z_mask;
362
if (!(def->flags & TCG_OPF_64BIT)) {
362
if (!(def->flags & TCG_OPF_64BIT)) {
363
- mask |= ~(tcg_target_ulong)0xffffffffu;
363
- mask |= ~(tcg_target_ulong)0xffffffffu;
364
+ z_mask |= ~(tcg_target_ulong)0xffffffffu;
364
+ z_mask |= ~(tcg_target_ulong)0xffffffffu;
365
partmask &= 0xffffffffu;
365
partmask &= 0xffffffffu;
366
affected &= 0xffffffffu;
366
affected &= 0xffffffffu;
367
}
367
}
368
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
368
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
369
vs the high word of the input. */
369
vs the high word of the input. */
370
do_setcond_high:
370
do_setcond_high:
371
reset_temp(op->args[0]);
371
reset_temp(op->args[0]);
372
- arg_info(op->args[0])->mask = 1;
372
- arg_info(op->args[0])->mask = 1;
373
+ arg_info(op->args[0])->z_mask = 1;
373
+ arg_info(op->args[0])->z_mask = 1;
374
op->opc = INDEX_op_setcond_i32;
374
op->opc = INDEX_op_setcond_i32;
375
op->args[1] = op->args[2];
375
op->args[1] = op->args[2];
376
op->args[2] = op->args[4];
376
op->args[2] = op->args[4];
377
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
377
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
378
}
378
}
379
do_setcond_low:
379
do_setcond_low:
380
reset_temp(op->args[0]);
380
reset_temp(op->args[0]);
381
- arg_info(op->args[0])->mask = 1;
381
- arg_info(op->args[0])->mask = 1;
382
+ arg_info(op->args[0])->z_mask = 1;
382
+ arg_info(op->args[0])->z_mask = 1;
383
op->opc = INDEX_op_setcond_i32;
383
op->opc = INDEX_op_setcond_i32;
384
op->args[2] = op->args[3];
384
op->args[2] = op->args[3];
385
op->args[3] = op->args[5];
385
op->args[3] = op->args[5];
386
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
386
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
387
/* Default case: we know nothing about operation (or were unable
387
/* Default case: we know nothing about operation (or were unable
388
to compute the operation result) so no propagation is done.
388
to compute the operation result) so no propagation is done.
389
We trash everything if the operation is the end of a basic
389
We trash everything if the operation is the end of a basic
390
- block, otherwise we only trash the output args. "mask" is
390
- block, otherwise we only trash the output args. "mask" is
391
+ block, otherwise we only trash the output args. "z_mask" is
391
+ block, otherwise we only trash the output args. "z_mask" is
392
the non-zero bits mask for the first output arg. */
392
the non-zero bits mask for the first output arg. */
393
if (def->flags & TCG_OPF_BB_END) {
393
if (def->flags & TCG_OPF_BB_END) {
394
memset(&temps_used, 0, sizeof(temps_used));
394
memset(&temps_used, 0, sizeof(temps_used));
395
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
395
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
396
/* Save the corresponding known-zero bits mask for the
396
/* Save the corresponding known-zero bits mask for the
397
first output argument (only one supported so far). */
397
first output argument (only one supported so far). */
398
if (i == 0) {
398
if (i == 0) {
399
- arg_info(op->args[i])->mask = mask;
399
- arg_info(op->args[i])->mask = mask;
400
+ arg_info(op->args[i])->z_mask = z_mask;
400
+ arg_info(op->args[i])->z_mask = z_mask;
401
}
401
}
402
}
402
}
403
}
403
}
404
--
404
--
405
2.25.1
405
2.25.1
406
406
407
407
diff view generated by jsdifflib
1
Provide what will become a larger context for splitting
1
Provide what will become a larger context for splitting
2
the very large tcg_optimize function.
2
the very large tcg_optimize function.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
9
tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
10
1 file changed, 40 insertions(+), 37 deletions(-)
10
1 file changed, 40 insertions(+), 37 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
17
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
17
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
18
} TempOptInfo;
18
} TempOptInfo;
19
19
20
+typedef struct OptContext {
20
+typedef struct OptContext {
21
+ TCGTempSet temps_used;
21
+ TCGTempSet temps_used;
22
+} OptContext;
22
+} OptContext;
23
+
23
+
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
25
{
25
{
26
return ts->state_ptr;
26
return ts->state_ptr;
27
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
27
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
28
}
28
}
29
29
30
/* Initialize and activate a temporary. */
30
/* Initialize and activate a temporary. */
31
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
31
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
32
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
32
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
33
{
33
{
34
size_t idx = temp_idx(ts);
34
size_t idx = temp_idx(ts);
35
TempOptInfo *ti;
35
TempOptInfo *ti;
36
36
37
- if (test_bit(idx, temps_used->l)) {
37
- if (test_bit(idx, temps_used->l)) {
38
+ if (test_bit(idx, ctx->temps_used.l)) {
38
+ if (test_bit(idx, ctx->temps_used.l)) {
39
return;
39
return;
40
}
40
}
41
- set_bit(idx, temps_used->l);
41
- set_bit(idx, temps_used->l);
42
+ set_bit(idx, ctx->temps_used.l);
42
+ set_bit(idx, ctx->temps_used.l);
43
43
44
ti = ts->state_ptr;
44
ti = ts->state_ptr;
45
if (ti == NULL) {
45
if (ti == NULL) {
46
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
46
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
47
}
47
}
48
}
48
}
49
49
50
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
50
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
51
+static void init_arg_info(OptContext *ctx, TCGArg arg)
51
+static void init_arg_info(OptContext *ctx, TCGArg arg)
52
{
52
{
53
- init_ts_info(temps_used, arg_temp(arg));
53
- init_ts_info(temps_used, arg_temp(arg));
54
+ init_ts_info(ctx, arg_temp(arg));
54
+ init_ts_info(ctx, arg_temp(arg));
55
}
55
}
56
56
57
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
57
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
58
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
58
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
59
}
59
}
60
}
60
}
61
61
62
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
62
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
63
+static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
63
+static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
64
TCGOp *op, TCGArg dst, uint64_t val)
64
TCGOp *op, TCGArg dst, uint64_t val)
65
{
65
{
66
const TCGOpDef *def = &tcg_op_defs[op->opc];
66
const TCGOpDef *def = &tcg_op_defs[op->opc];
67
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
67
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
68
68
69
/* Convert movi to mov with constant temp. */
69
/* Convert movi to mov with constant temp. */
70
tv = tcg_constant_internal(type, val);
70
tv = tcg_constant_internal(type, val);
71
- init_ts_info(temps_used, tv);
71
- init_ts_info(temps_used, tv);
72
+ init_ts_info(ctx, tv);
72
+ init_ts_info(ctx, tv);
73
tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
73
tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
74
}
74
}
75
75
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
{
77
{
78
int nb_temps, nb_globals, i;
78
int nb_temps, nb_globals, i;
79
TCGOp *op, *op_next, *prev_mb = NULL;
79
TCGOp *op, *op_next, *prev_mb = NULL;
80
- TCGTempSet temps_used;
80
- TCGTempSet temps_used;
81
+ OptContext ctx = {};
81
+ OptContext ctx = {};
82
82
83
/* Array VALS has an element for each temp.
83
/* Array VALS has an element for each temp.
84
If this temp holds a constant then its value is kept in VALS' element.
84
If this temp holds a constant then its value is kept in VALS' element.
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
nb_temps = s->nb_temps;
86
nb_temps = s->nb_temps;
87
nb_globals = s->nb_globals;
87
nb_globals = s->nb_globals;
88
88
89
- memset(&temps_used, 0, sizeof(temps_used));
89
- memset(&temps_used, 0, sizeof(temps_used));
90
for (i = 0; i < nb_temps; ++i) {
90
for (i = 0; i < nb_temps; ++i) {
91
s->temps[i].state_ptr = NULL;
91
s->temps[i].state_ptr = NULL;
92
}
92
}
93
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
93
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
94
for (i = 0; i < nb_oargs + nb_iargs; i++) {
94
for (i = 0; i < nb_oargs + nb_iargs; i++) {
95
TCGTemp *ts = arg_temp(op->args[i]);
95
TCGTemp *ts = arg_temp(op->args[i]);
96
if (ts) {
96
if (ts) {
97
- init_ts_info(&temps_used, ts);
97
- init_ts_info(&temps_used, ts);
98
+ init_ts_info(&ctx, ts);
98
+ init_ts_info(&ctx, ts);
99
}
99
}
100
}
100
}
101
} else {
101
} else {
102
nb_oargs = def->nb_oargs;
102
nb_oargs = def->nb_oargs;
103
nb_iargs = def->nb_iargs;
103
nb_iargs = def->nb_iargs;
104
for (i = 0; i < nb_oargs + nb_iargs; i++) {
104
for (i = 0; i < nb_oargs + nb_iargs; i++) {
105
- init_arg_info(&temps_used, op->args[i]);
105
- init_arg_info(&temps_used, op->args[i]);
106
+ init_arg_info(&ctx, op->args[i]);
106
+ init_arg_info(&ctx, op->args[i]);
107
}
107
}
108
}
108
}
109
109
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64(rotr):
111
CASE_OP_32_64(rotr):
112
if (arg_is_const(op->args[1])
112
if (arg_is_const(op->args[1])
113
&& arg_info(op->args[1])->val == 0) {
113
&& arg_info(op->args[1])->val == 0) {
114
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
114
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
115
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
115
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
116
continue;
116
continue;
117
}
117
}
118
break;
118
break;
119
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
119
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
120
120
121
if (partmask == 0) {
121
if (partmask == 0) {
122
tcg_debug_assert(nb_oargs == 1);
122
tcg_debug_assert(nb_oargs == 1);
123
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
123
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
124
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
124
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
125
continue;
125
continue;
126
}
126
}
127
if (affected == 0) {
127
if (affected == 0) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
128
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
129
CASE_OP_32_64(mulsh):
129
CASE_OP_32_64(mulsh):
130
if (arg_is_const(op->args[2])
130
if (arg_is_const(op->args[2])
131
&& arg_info(op->args[2])->val == 0) {
131
&& arg_info(op->args[2])->val == 0) {
132
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
132
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
133
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
133
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
134
continue;
134
continue;
135
}
135
}
136
break;
136
break;
137
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
138
CASE_OP_32_64_VEC(sub):
138
CASE_OP_32_64_VEC(sub):
139
CASE_OP_32_64_VEC(xor):
139
CASE_OP_32_64_VEC(xor):
140
if (args_are_copies(op->args[1], op->args[2])) {
140
if (args_are_copies(op->args[1], op->args[2])) {
141
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
141
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
142
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
142
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
143
continue;
143
continue;
144
}
144
}
145
break;
145
break;
146
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
146
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
147
if (arg_is_const(op->args[1])) {
147
if (arg_is_const(op->args[1])) {
148
tmp = arg_info(op->args[1])->val;
148
tmp = arg_info(op->args[1])->val;
149
tmp = dup_const(TCGOP_VECE(op), tmp);
149
tmp = dup_const(TCGOP_VECE(op), tmp);
150
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
150
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
151
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
151
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
break;
152
break;
153
}
153
}
154
goto do_default;
154
goto do_default;
155
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
155
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
156
case INDEX_op_dup2_vec:
156
case INDEX_op_dup2_vec:
157
assert(TCG_TARGET_REG_BITS == 32);
157
assert(TCG_TARGET_REG_BITS == 32);
158
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
158
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
159
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
159
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
160
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0],
160
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
deposit64(arg_info(op->args[1])->val, 32, 32,
161
deposit64(arg_info(op->args[1])->val, 32, 32,
162
arg_info(op->args[2])->val));
162
arg_info(op->args[2])->val));
163
break;
163
break;
164
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
164
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
165
case INDEX_op_extrh_i64_i32:
165
case INDEX_op_extrh_i64_i32:
166
if (arg_is_const(op->args[1])) {
166
if (arg_is_const(op->args[1])) {
167
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
167
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
168
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
168
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
169
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
169
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
break;
170
break;
171
}
171
}
172
goto do_default;
172
goto do_default;
173
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
173
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
174
if (arg_is_const(op->args[1])) {
174
if (arg_is_const(op->args[1])) {
175
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
175
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
176
op->args[2]);
176
op->args[2]);
177
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
177
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
178
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
178
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
break;
179
break;
180
}
180
}
181
goto do_default;
181
goto do_default;
182
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
182
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
183
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
183
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
184
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
184
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
185
arg_info(op->args[2])->val);
185
arg_info(op->args[2])->val);
186
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
186
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
187
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
187
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
break;
188
break;
189
}
189
}
190
goto do_default;
190
goto do_default;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
TCGArg v = arg_info(op->args[1])->val;
192
TCGArg v = arg_info(op->args[1])->val;
193
if (v != 0) {
193
if (v != 0) {
194
tmp = do_constant_folding(opc, v, 0);
194
tmp = do_constant_folding(opc, v, 0);
195
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
195
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
196
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
196
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
} else {
197
} else {
198
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
198
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
199
}
199
}
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
tmp = deposit64(arg_info(op->args[1])->val,
201
tmp = deposit64(arg_info(op->args[1])->val,
202
op->args[3], op->args[4],
202
op->args[3], op->args[4],
203
arg_info(op->args[2])->val);
203
arg_info(op->args[2])->val);
204
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
204
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
205
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
205
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
206
break;
206
break;
207
}
207
}
208
goto do_default;
208
goto do_default;
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
if (arg_is_const(op->args[1])) {
210
if (arg_is_const(op->args[1])) {
211
tmp = extract64(arg_info(op->args[1])->val,
211
tmp = extract64(arg_info(op->args[1])->val,
212
op->args[2], op->args[3]);
212
op->args[2], op->args[3]);
213
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
213
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
214
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
214
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
215
break;
215
break;
216
}
216
}
217
goto do_default;
217
goto do_default;
218
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
218
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
219
if (arg_is_const(op->args[1])) {
219
if (arg_is_const(op->args[1])) {
220
tmp = sextract64(arg_info(op->args[1])->val,
220
tmp = sextract64(arg_info(op->args[1])->val,
221
op->args[2], op->args[3]);
221
op->args[2], op->args[3]);
222
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
222
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
223
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
223
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
224
break;
224
break;
225
}
225
}
226
goto do_default;
226
goto do_default;
227
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
227
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
228
tmp = (int32_t)(((uint32_t)v1 >> shr) |
228
tmp = (int32_t)(((uint32_t)v1 >> shr) |
229
((uint32_t)v2 << (32 - shr)));
229
((uint32_t)v2 << (32 - shr)));
230
}
230
}
231
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
231
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
232
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
232
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
233
break;
233
break;
234
}
234
}
235
goto do_default;
235
goto do_default;
236
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
236
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
237
tmp = do_constant_folding_cond(opc, op->args[1],
237
tmp = do_constant_folding_cond(opc, op->args[1],
238
op->args[2], op->args[3]);
238
op->args[2], op->args[3]);
239
if (tmp != 2) {
239
if (tmp != 2) {
240
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
240
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
241
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
241
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
242
break;
242
break;
243
}
243
}
244
goto do_default;
244
goto do_default;
245
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
245
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
246
op->args[1], op->args[2]);
246
op->args[1], op->args[2]);
247
if (tmp != 2) {
247
if (tmp != 2) {
248
if (tmp) {
248
if (tmp) {
249
- memset(&temps_used, 0, sizeof(temps_used));
249
- memset(&temps_used, 0, sizeof(temps_used));
250
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
250
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
251
op->opc = INDEX_op_br;
251
op->opc = INDEX_op_br;
252
op->args[0] = op->args[3];
252
op->args[0] = op->args[3];
253
} else {
253
} else {
254
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
254
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
255
255
256
rl = op->args[0];
256
rl = op->args[0];
257
rh = op->args[1];
257
rh = op->args[1];
258
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
258
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
259
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
259
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
260
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
260
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
261
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
261
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
262
break;
262
break;
263
}
263
}
264
goto do_default;
264
goto do_default;
265
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
265
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
266
266
267
rl = op->args[0];
267
rl = op->args[0];
268
rh = op->args[1];
268
rh = op->args[1];
269
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
269
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
270
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
270
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
271
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
271
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
272
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
272
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
273
break;
273
break;
274
}
274
}
275
goto do_default;
275
goto do_default;
276
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
276
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
277
if (tmp != 2) {
277
if (tmp != 2) {
278
if (tmp) {
278
if (tmp) {
279
do_brcond_true:
279
do_brcond_true:
280
- memset(&temps_used, 0, sizeof(temps_used));
280
- memset(&temps_used, 0, sizeof(temps_used));
281
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
281
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
282
op->opc = INDEX_op_br;
282
op->opc = INDEX_op_br;
283
op->args[0] = op->args[5];
283
op->args[0] = op->args[5];
284
} else {
284
} else {
285
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
285
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
286
/* Simplify LT/GE comparisons vs zero to a single compare
286
/* Simplify LT/GE comparisons vs zero to a single compare
287
vs the high word of the input. */
287
vs the high word of the input. */
288
do_brcond_high:
288
do_brcond_high:
289
- memset(&temps_used, 0, sizeof(temps_used));
289
- memset(&temps_used, 0, sizeof(temps_used));
290
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
290
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
op->opc = INDEX_op_brcond_i32;
291
op->opc = INDEX_op_brcond_i32;
292
op->args[0] = op->args[1];
292
op->args[0] = op->args[1];
293
op->args[1] = op->args[3];
293
op->args[1] = op->args[3];
294
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
294
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
295
goto do_default;
295
goto do_default;
296
}
296
}
297
do_brcond_low:
297
do_brcond_low:
298
- memset(&temps_used, 0, sizeof(temps_used));
298
- memset(&temps_used, 0, sizeof(temps_used));
299
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
299
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
300
op->opc = INDEX_op_brcond_i32;
300
op->opc = INDEX_op_brcond_i32;
301
op->args[1] = op->args[2];
301
op->args[1] = op->args[2];
302
op->args[2] = op->args[4];
302
op->args[2] = op->args[4];
303
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
303
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
304
op->args[5]);
304
op->args[5]);
305
if (tmp != 2) {
305
if (tmp != 2) {
306
do_setcond_const:
306
do_setcond_const:
307
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
307
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
308
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
308
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
309
} else if ((op->args[5] == TCG_COND_LT
309
} else if ((op->args[5] == TCG_COND_LT
310
|| op->args[5] == TCG_COND_GE)
310
|| op->args[5] == TCG_COND_GE)
311
&& arg_is_const(op->args[3])
311
&& arg_is_const(op->args[3])
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (!(tcg_call_flags(op)
313
if (!(tcg_call_flags(op)
314
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
314
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
315
for (i = 0; i < nb_globals; i++) {
315
for (i = 0; i < nb_globals; i++) {
316
- if (test_bit(i, temps_used.l)) {
316
- if (test_bit(i, temps_used.l)) {
317
+ if (test_bit(i, ctx.temps_used.l)) {
317
+ if (test_bit(i, ctx.temps_used.l)) {
318
reset_ts(&s->temps[i]);
318
reset_ts(&s->temps[i]);
319
}
319
}
320
}
320
}
321
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
321
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
322
block, otherwise we only trash the output args. "z_mask" is
322
block, otherwise we only trash the output args. "z_mask" is
323
the non-zero bits mask for the first output arg. */
323
the non-zero bits mask for the first output arg. */
324
if (def->flags & TCG_OPF_BB_END) {
324
if (def->flags & TCG_OPF_BB_END) {
325
- memset(&temps_used, 0, sizeof(temps_used));
325
- memset(&temps_used, 0, sizeof(temps_used));
326
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
326
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
327
} else {
327
} else {
328
do_reset_output:
328
do_reset_output:
329
for (i = 0; i < nb_oargs; i++) {
329
for (i = 0; i < nb_oargs; i++) {
330
--
330
--
331
2.25.1
331
2.25.1
332
332
333
333
diff view generated by jsdifflib
1
Break the final cleanup clause out of the main switch
1
Break the final cleanup clause out of the main switch
2
statement. When fully folding an opcode to mov/movi,
2
statement. When fully folding an opcode to mov/movi,
3
use "continue" to process the next opcode, else break
3
use "continue" to process the next opcode, else break
4
to fall into the final cleanup.
4
to fall into the final cleanup.
5
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
10
---
11
tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
11
tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
12
1 file changed, 94 insertions(+), 96 deletions(-)
12
1 file changed, 94 insertions(+), 96 deletions(-)
13
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
16
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
17
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
19
switch (opc) {
19
switch (opc) {
20
CASE_OP_32_64_VEC(mov):
20
CASE_OP_32_64_VEC(mov):
21
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
21
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
22
- break;
22
- break;
23
+ continue;
23
+ continue;
24
24
25
case INDEX_op_dup_vec:
25
case INDEX_op_dup_vec:
26
if (arg_is_const(op->args[1])) {
26
if (arg_is_const(op->args[1])) {
27
tmp = arg_info(op->args[1])->val;
27
tmp = arg_info(op->args[1])->val;
28
tmp = dup_const(TCGOP_VECE(op), tmp);
28
tmp = dup_const(TCGOP_VECE(op), tmp);
29
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
29
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
30
- break;
30
- break;
31
+ continue;
31
+ continue;
32
}
32
}
33
- goto do_default;
33
- goto do_default;
34
+ break;
34
+ break;
35
35
36
case INDEX_op_dup2_vec:
36
case INDEX_op_dup2_vec:
37
assert(TCG_TARGET_REG_BITS == 32);
37
assert(TCG_TARGET_REG_BITS == 32);
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
39
tcg_opt_gen_movi(s, &ctx, op, op->args[0],
39
tcg_opt_gen_movi(s, &ctx, op, op->args[0],
40
deposit64(arg_info(op->args[1])->val, 32, 32,
40
deposit64(arg_info(op->args[1])->val, 32, 32,
41
arg_info(op->args[2])->val));
41
arg_info(op->args[2])->val));
42
- break;
42
- break;
43
+ continue;
43
+ continue;
44
} else if (args_are_copies(op->args[1], op->args[2])) {
44
} else if (args_are_copies(op->args[1], op->args[2])) {
45
op->opc = INDEX_op_dup_vec;
45
op->opc = INDEX_op_dup_vec;
46
TCGOP_VECE(op) = MO_32;
46
TCGOP_VECE(op) = MO_32;
47
nb_iargs = 1;
47
nb_iargs = 1;
48
}
48
}
49
- goto do_default;
49
- goto do_default;
50
+ break;
50
+ break;
51
51
52
CASE_OP_32_64(not):
52
CASE_OP_32_64(not):
53
CASE_OP_32_64(neg):
53
CASE_OP_32_64(neg):
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
if (arg_is_const(op->args[1])) {
55
if (arg_is_const(op->args[1])) {
56
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
56
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
57
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
57
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
58
- break;
58
- break;
59
+ continue;
59
+ continue;
60
}
60
}
61
- goto do_default;
61
- goto do_default;
62
+ break;
62
+ break;
63
63
64
CASE_OP_32_64(bswap16):
64
CASE_OP_32_64(bswap16):
65
CASE_OP_32_64(bswap32):
65
CASE_OP_32_64(bswap32):
66
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
66
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
67
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
67
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
68
op->args[2]);
68
op->args[2]);
69
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
69
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
70
- break;
70
- break;
71
+ continue;
71
+ continue;
72
}
72
}
73
- goto do_default;
73
- goto do_default;
74
+ break;
74
+ break;
75
75
76
CASE_OP_32_64(add):
76
CASE_OP_32_64(add):
77
CASE_OP_32_64(sub):
77
CASE_OP_32_64(sub):
78
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
78
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
79
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
79
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
80
arg_info(op->args[2])->val);
80
arg_info(op->args[2])->val);
81
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
81
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
82
- break;
82
- break;
83
+ continue;
83
+ continue;
84
}
84
}
85
- goto do_default;
85
- goto do_default;
86
+ break;
86
+ break;
87
87
88
CASE_OP_32_64(clz):
88
CASE_OP_32_64(clz):
89
CASE_OP_32_64(ctz):
89
CASE_OP_32_64(ctz):
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
} else {
91
} else {
92
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
92
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
93
}
93
}
94
- break;
94
- break;
95
+ continue;
95
+ continue;
96
}
96
}
97
- goto do_default;
97
- goto do_default;
98
+ break;
98
+ break;
99
99
100
CASE_OP_32_64(deposit):
100
CASE_OP_32_64(deposit):
101
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
101
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
102
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
102
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
103
op->args[3], op->args[4],
103
op->args[3], op->args[4],
104
arg_info(op->args[2])->val);
104
arg_info(op->args[2])->val);
105
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
105
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
106
- break;
106
- break;
107
+ continue;
107
+ continue;
108
}
108
}
109
- goto do_default;
109
- goto do_default;
110
+ break;
110
+ break;
111
111
112
CASE_OP_32_64(extract):
112
CASE_OP_32_64(extract):
113
if (arg_is_const(op->args[1])) {
113
if (arg_is_const(op->args[1])) {
114
tmp = extract64(arg_info(op->args[1])->val,
114
tmp = extract64(arg_info(op->args[1])->val,
115
op->args[2], op->args[3]);
115
op->args[2], op->args[3]);
116
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
116
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
117
- break;
117
- break;
118
+ continue;
118
+ continue;
119
}
119
}
120
- goto do_default;
120
- goto do_default;
121
+ break;
121
+ break;
122
122
123
CASE_OP_32_64(sextract):
123
CASE_OP_32_64(sextract):
124
if (arg_is_const(op->args[1])) {
124
if (arg_is_const(op->args[1])) {
125
tmp = sextract64(arg_info(op->args[1])->val,
125
tmp = sextract64(arg_info(op->args[1])->val,
126
op->args[2], op->args[3]);
126
op->args[2], op->args[3]);
127
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
127
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
128
- break;
128
- break;
129
+ continue;
129
+ continue;
130
}
130
}
131
- goto do_default;
131
- goto do_default;
132
+ break;
132
+ break;
133
133
134
CASE_OP_32_64(extract2):
134
CASE_OP_32_64(extract2):
135
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
135
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
136
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
136
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
((uint32_t)v2 << (32 - shr)));
137
((uint32_t)v2 << (32 - shr)));
138
}
138
}
139
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
139
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
140
- break;
140
- break;
141
+ continue;
141
+ continue;
142
}
142
}
143
- goto do_default;
143
- goto do_default;
144
+ break;
144
+ break;
145
145
146
CASE_OP_32_64(setcond):
146
CASE_OP_32_64(setcond):
147
tmp = do_constant_folding_cond(opc, op->args[1],
147
tmp = do_constant_folding_cond(opc, op->args[1],
148
op->args[2], op->args[3]);
148
op->args[2], op->args[3]);
149
if (tmp != 2) {
149
if (tmp != 2) {
150
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
150
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
151
- break;
151
- break;
152
+ continue;
152
+ continue;
153
}
153
}
154
- goto do_default;
154
- goto do_default;
155
+ break;
155
+ break;
156
156
157
CASE_OP_32_64(brcond):
157
CASE_OP_32_64(brcond):
158
tmp = do_constant_folding_cond(opc, op->args[0],
158
tmp = do_constant_folding_cond(opc, op->args[0],
159
op->args[1], op->args[2]);
159
op->args[1], op->args[2]);
160
- if (tmp != 2) {
160
- if (tmp != 2) {
161
- if (tmp) {
161
- if (tmp) {
162
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
162
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
163
- op->opc = INDEX_op_br;
163
- op->opc = INDEX_op_br;
164
- op->args[0] = op->args[3];
164
- op->args[0] = op->args[3];
165
- } else {
165
- } else {
166
- tcg_op_remove(s, op);
166
- tcg_op_remove(s, op);
167
- }
167
- }
168
+ switch (tmp) {
168
+ switch (tmp) {
169
+ case 0:
169
+ case 0:
170
+ tcg_op_remove(s, op);
170
+ tcg_op_remove(s, op);
171
+ continue;
171
+ continue;
172
+ case 1:
172
+ case 1:
173
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
173
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
174
+ op->opc = opc = INDEX_op_br;
174
+ op->opc = opc = INDEX_op_br;
175
+ op->args[0] = op->args[3];
175
+ op->args[0] = op->args[3];
176
break;
176
break;
177
}
177
}
178
- goto do_default;
178
- goto do_default;
179
+ break;
179
+ break;
180
180
181
CASE_OP_32_64(movcond):
181
CASE_OP_32_64(movcond):
182
tmp = do_constant_folding_cond(opc, op->args[1],
182
tmp = do_constant_folding_cond(opc, op->args[1],
183
op->args[2], op->args[5]);
183
op->args[2], op->args[5]);
184
if (tmp != 2) {
184
if (tmp != 2) {
185
tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
185
tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
186
- break;
186
- break;
187
+ continue;
187
+ continue;
188
}
188
}
189
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
189
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
190
uint64_t tv = arg_info(op->args[3])->val;
190
uint64_t tv = arg_info(op->args[3])->val;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
if (fv == 1 && tv == 0) {
192
if (fv == 1 && tv == 0) {
193
cond = tcg_invert_cond(cond);
193
cond = tcg_invert_cond(cond);
194
} else if (!(tv == 1 && fv == 0)) {
194
} else if (!(tv == 1 && fv == 0)) {
195
- goto do_default;
195
- goto do_default;
196
+ break;
196
+ break;
197
}
197
}
198
op->args[3] = cond;
198
op->args[3] = cond;
199
op->opc = opc = (opc == INDEX_op_movcond_i32
199
op->opc = opc = (opc == INDEX_op_movcond_i32
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
: INDEX_op_setcond_i64);
201
: INDEX_op_setcond_i64);
202
nb_iargs = 2;
202
nb_iargs = 2;
203
}
203
}
204
- goto do_default;
204
- goto do_default;
205
+ break;
205
+ break;
206
206
207
case INDEX_op_add2_i32:
207
case INDEX_op_add2_i32:
208
case INDEX_op_sub2_i32:
208
case INDEX_op_sub2_i32:
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
rh = op->args[1];
210
rh = op->args[1];
211
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
211
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
212
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
212
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
213
- break;
213
- break;
214
+ continue;
214
+ continue;
215
}
215
}
216
- goto do_default;
216
- goto do_default;
217
+ break;
217
+ break;
218
218
219
case INDEX_op_mulu2_i32:
219
case INDEX_op_mulu2_i32:
220
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
220
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
221
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
221
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
222
rh = op->args[1];
222
rh = op->args[1];
223
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
223
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
224
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
224
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
225
- break;
225
- break;
226
+ continue;
226
+ continue;
227
}
227
}
228
- goto do_default;
228
- goto do_default;
229
+ break;
229
+ break;
230
230
231
case INDEX_op_brcond2_i32:
231
case INDEX_op_brcond2_i32:
232
tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
232
tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
233
op->args[4]);
233
op->args[4]);
234
- if (tmp != 2) {
234
- if (tmp != 2) {
235
- if (tmp) {
235
- if (tmp) {
236
- do_brcond_true:
236
- do_brcond_true:
237
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
237
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
238
- op->opc = INDEX_op_br;
238
- op->opc = INDEX_op_br;
239
- op->args[0] = op->args[5];
239
- op->args[0] = op->args[5];
240
- } else {
240
- } else {
241
+ if (tmp == 0) {
241
+ if (tmp == 0) {
242
do_brcond_false:
242
do_brcond_false:
243
- tcg_op_remove(s, op);
243
- tcg_op_remove(s, op);
244
- }
244
- }
245
- } else if ((op->args[4] == TCG_COND_LT
245
- } else if ((op->args[4] == TCG_COND_LT
246
- || op->args[4] == TCG_COND_GE)
246
- || op->args[4] == TCG_COND_GE)
247
- && arg_is_const(op->args[2])
247
- && arg_is_const(op->args[2])
248
- && arg_info(op->args[2])->val == 0
248
- && arg_info(op->args[2])->val == 0
249
- && arg_is_const(op->args[3])
249
- && arg_is_const(op->args[3])
250
- && arg_info(op->args[3])->val == 0) {
250
- && arg_info(op->args[3])->val == 0) {
251
+ tcg_op_remove(s, op);
251
+ tcg_op_remove(s, op);
252
+ continue;
252
+ continue;
253
+ }
253
+ }
254
+ if (tmp == 1) {
254
+ if (tmp == 1) {
255
+ do_brcond_true:
255
+ do_brcond_true:
256
+ op->opc = opc = INDEX_op_br;
256
+ op->opc = opc = INDEX_op_br;
257
+ op->args[0] = op->args[5];
257
+ op->args[0] = op->args[5];
258
+ break;
258
+ break;
259
+ }
259
+ }
260
+ if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
260
+ if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
261
+ && arg_is_const(op->args[2])
261
+ && arg_is_const(op->args[2])
262
+ && arg_info(op->args[2])->val == 0
262
+ && arg_info(op->args[2])->val == 0
263
+ && arg_is_const(op->args[3])
263
+ && arg_is_const(op->args[3])
264
+ && arg_info(op->args[3])->val == 0) {
264
+ && arg_info(op->args[3])->val == 0) {
265
/* Simplify LT/GE comparisons vs zero to a single compare
265
/* Simplify LT/GE comparisons vs zero to a single compare
266
vs the high word of the input. */
266
vs the high word of the input. */
267
do_brcond_high:
267
do_brcond_high:
268
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
268
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
269
- op->opc = INDEX_op_brcond_i32;
269
- op->opc = INDEX_op_brcond_i32;
270
+ op->opc = opc = INDEX_op_brcond_i32;
270
+ op->opc = opc = INDEX_op_brcond_i32;
271
op->args[0] = op->args[1];
271
op->args[0] = op->args[1];
272
op->args[1] = op->args[3];
272
op->args[1] = op->args[3];
273
op->args[2] = op->args[4];
273
op->args[2] = op->args[4];
274
op->args[3] = op->args[5];
274
op->args[3] = op->args[5];
275
- } else if (op->args[4] == TCG_COND_EQ) {
275
- } else if (op->args[4] == TCG_COND_EQ) {
276
+ break;
276
+ break;
277
+ }
277
+ }
278
+ if (op->args[4] == TCG_COND_EQ) {
278
+ if (op->args[4] == TCG_COND_EQ) {
279
/* Simplify EQ comparisons where one of the pairs
279
/* Simplify EQ comparisons where one of the pairs
280
can be simplified. */
280
can be simplified. */
281
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
281
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
283
if (tmp == 0) {
283
if (tmp == 0) {
284
goto do_brcond_false;
284
goto do_brcond_false;
285
} else if (tmp != 1) {
285
} else if (tmp != 1) {
286
- goto do_default;
286
- goto do_default;
287
+ break;
287
+ break;
288
}
288
}
289
do_brcond_low:
289
do_brcond_low:
290
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
290
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
292
op->args[1] = op->args[2];
292
op->args[1] = op->args[2];
293
op->args[2] = op->args[4];
293
op->args[2] = op->args[4];
294
op->args[3] = op->args[5];
294
op->args[3] = op->args[5];
295
- } else if (op->args[4] == TCG_COND_NE) {
295
- } else if (op->args[4] == TCG_COND_NE) {
296
+ break;
296
+ break;
297
+ }
297
+ }
298
+ if (op->args[4] == TCG_COND_NE) {
298
+ if (op->args[4] == TCG_COND_NE) {
299
/* Simplify NE comparisons where one of the pairs
299
/* Simplify NE comparisons where one of the pairs
300
can be simplified. */
300
can be simplified. */
301
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
301
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
302
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
302
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
303
} else if (tmp == 1) {
303
} else if (tmp == 1) {
304
goto do_brcond_true;
304
goto do_brcond_true;
305
}
305
}
306
- goto do_default;
306
- goto do_default;
307
- } else {
307
- } else {
308
- goto do_default;
308
- goto do_default;
309
}
309
}
310
break;
310
break;
311
311
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (tmp != 2) {
313
if (tmp != 2) {
314
do_setcond_const:
314
do_setcond_const:
315
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
315
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
316
- } else if ((op->args[5] == TCG_COND_LT
316
- } else if ((op->args[5] == TCG_COND_LT
317
- || op->args[5] == TCG_COND_GE)
317
- || op->args[5] == TCG_COND_GE)
318
- && arg_is_const(op->args[3])
318
- && arg_is_const(op->args[3])
319
- && arg_info(op->args[3])->val == 0
319
- && arg_info(op->args[3])->val == 0
320
- && arg_is_const(op->args[4])
320
- && arg_is_const(op->args[4])
321
- && arg_info(op->args[4])->val == 0) {
321
- && arg_info(op->args[4])->val == 0) {
322
+ continue;
322
+ continue;
323
+ }
323
+ }
324
+ if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
324
+ if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
325
+ && arg_is_const(op->args[3])
325
+ && arg_is_const(op->args[3])
326
+ && arg_info(op->args[3])->val == 0
326
+ && arg_info(op->args[3])->val == 0
327
+ && arg_is_const(op->args[4])
327
+ && arg_is_const(op->args[4])
328
+ && arg_info(op->args[4])->val == 0) {
328
+ && arg_info(op->args[4])->val == 0) {
329
/* Simplify LT/GE comparisons vs zero to a single compare
329
/* Simplify LT/GE comparisons vs zero to a single compare
330
vs the high word of the input. */
330
vs the high word of the input. */
331
do_setcond_high:
331
do_setcond_high:
332
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
332
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
333
op->args[1] = op->args[2];
333
op->args[1] = op->args[2];
334
op->args[2] = op->args[4];
334
op->args[2] = op->args[4];
335
op->args[3] = op->args[5];
335
op->args[3] = op->args[5];
336
- } else if (op->args[5] == TCG_COND_EQ) {
336
- } else if (op->args[5] == TCG_COND_EQ) {
337
+ break;
337
+ break;
338
+ }
338
+ }
339
+ if (op->args[5] == TCG_COND_EQ) {
339
+ if (op->args[5] == TCG_COND_EQ) {
340
/* Simplify EQ comparisons where one of the pairs
340
/* Simplify EQ comparisons where one of the pairs
341
can be simplified. */
341
can be simplified. */
342
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
342
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
343
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
343
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
344
if (tmp == 0) {
344
if (tmp == 0) {
345
goto do_setcond_high;
345
goto do_setcond_high;
346
} else if (tmp != 1) {
346
} else if (tmp != 1) {
347
- goto do_default;
347
- goto do_default;
348
+ break;
348
+ break;
349
}
349
}
350
do_setcond_low:
350
do_setcond_low:
351
reset_temp(op->args[0]);
351
reset_temp(op->args[0]);
352
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
352
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
353
op->opc = INDEX_op_setcond_i32;
353
op->opc = INDEX_op_setcond_i32;
354
op->args[2] = op->args[3];
354
op->args[2] = op->args[3];
355
op->args[3] = op->args[5];
355
op->args[3] = op->args[5];
356
- } else if (op->args[5] == TCG_COND_NE) {
356
- } else if (op->args[5] == TCG_COND_NE) {
357
+ break;
357
+ break;
358
+ }
358
+ }
359
+ if (op->args[5] == TCG_COND_NE) {
359
+ if (op->args[5] == TCG_COND_NE) {
360
/* Simplify NE comparisons where one of the pairs
360
/* Simplify NE comparisons where one of the pairs
361
can be simplified. */
361
can be simplified. */
362
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
362
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
363
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
363
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
364
} else if (tmp == 1) {
364
} else if (tmp == 1) {
365
goto do_setcond_const;
365
goto do_setcond_const;
366
}
366
}
367
- goto do_default;
367
- goto do_default;
368
- } else {
368
- } else {
369
- goto do_default;
369
- goto do_default;
370
}
370
}
371
break;
371
break;
372
372
373
- case INDEX_op_call:
373
- case INDEX_op_call:
374
- if (!(tcg_call_flags(op)
374
- if (!(tcg_call_flags(op)
375
+ default:
375
+ default:
376
+ break;
376
+ break;
377
+ }
377
+ }
378
+
378
+
379
+ /* Some of the folding above can change opc. */
379
+ /* Some of the folding above can change opc. */
380
+ opc = op->opc;
380
+ opc = op->opc;
381
+ def = &tcg_op_defs[opc];
381
+ def = &tcg_op_defs[opc];
382
+ if (def->flags & TCG_OPF_BB_END) {
382
+ if (def->flags & TCG_OPF_BB_END) {
383
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
383
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
384
+ } else {
384
+ } else {
385
+ if (opc == INDEX_op_call &&
385
+ if (opc == INDEX_op_call &&
386
+ !(tcg_call_flags(op)
386
+ !(tcg_call_flags(op)
387
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
387
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
388
for (i = 0; i < nb_globals; i++) {
388
for (i = 0; i < nb_globals; i++) {
389
if (test_bit(i, ctx.temps_used.l)) {
389
if (test_bit(i, ctx.temps_used.l)) {
390
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
390
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
391
}
391
}
392
}
392
}
393
}
393
}
394
- goto do_reset_output;
394
- goto do_reset_output;
395
395
396
- default:
396
- default:
397
- do_default:
397
- do_default:
398
- /* Default case: we know nothing about operation (or were unable
398
- /* Default case: we know nothing about operation (or were unable
399
- to compute the operation result) so no propagation is done.
399
- to compute the operation result) so no propagation is done.
400
- We trash everything if the operation is the end of a basic
400
- We trash everything if the operation is the end of a basic
401
- block, otherwise we only trash the output args. "z_mask" is
401
- block, otherwise we only trash the output args. "z_mask" is
402
- the non-zero bits mask for the first output arg. */
402
- the non-zero bits mask for the first output arg. */
403
- if (def->flags & TCG_OPF_BB_END) {
403
- if (def->flags & TCG_OPF_BB_END) {
404
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
404
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
405
- } else {
405
- } else {
406
- do_reset_output:
406
- do_reset_output:
407
- for (i = 0; i < nb_oargs; i++) {
407
- for (i = 0; i < nb_oargs; i++) {
408
- reset_temp(op->args[i]);
408
- reset_temp(op->args[i]);
409
- /* Save the corresponding known-zero bits mask for the
409
- /* Save the corresponding known-zero bits mask for the
410
- first output argument (only one supported so far). */
410
- first output argument (only one supported so far). */
411
- if (i == 0) {
411
- if (i == 0) {
412
- arg_info(op->args[i])->z_mask = z_mask;
412
- arg_info(op->args[i])->z_mask = z_mask;
413
- }
413
- }
414
+ for (i = 0; i < nb_oargs; i++) {
414
+ for (i = 0; i < nb_oargs; i++) {
415
+ reset_temp(op->args[i]);
415
+ reset_temp(op->args[i]);
416
+ /* Save the corresponding known-zero bits mask for the
416
+ /* Save the corresponding known-zero bits mask for the
417
+ first output argument (only one supported so far). */
417
+ first output argument (only one supported so far). */
418
+ if (i == 0) {
418
+ if (i == 0) {
419
+ arg_info(op->args[i])->z_mask = z_mask;
419
+ arg_info(op->args[i])->z_mask = z_mask;
420
}
420
}
421
}
421
}
422
- break;
422
- break;
423
}
423
}
424
424
425
/* Eliminate duplicate and redundant fence instructions. */
425
/* Eliminate duplicate and redundant fence instructions. */
426
--
426
--
427
2.25.1
427
2.25.1
428
428
429
429
diff view generated by jsdifflib
1
Adjust the interface to take the OptContext parameter instead
1
Adjust the interface to take the OptContext parameter instead
2
of TCGContext or both.
2
of TCGContext or both.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
8
tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
9
1 file changed, 34 insertions(+), 33 deletions(-)
9
1 file changed, 34 insertions(+), 33 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
16
} TempOptInfo;
16
} TempOptInfo;
17
17
18
typedef struct OptContext {
18
typedef struct OptContext {
19
+ TCGContext *tcg;
19
+ TCGContext *tcg;
20
TCGTempSet temps_used;
20
TCGTempSet temps_used;
21
} OptContext;
21
} OptContext;
22
22
23
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
23
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
24
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
24
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
25
}
25
}
26
26
27
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
27
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
28
+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
28
+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
29
{
29
{
30
TCGTemp *dst_ts = arg_temp(dst);
30
TCGTemp *dst_ts = arg_temp(dst);
31
TCGTemp *src_ts = arg_temp(src);
31
TCGTemp *src_ts = arg_temp(src);
32
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
32
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
33
TCGOpcode new_op;
33
TCGOpcode new_op;
34
34
35
if (ts_are_copies(dst_ts, src_ts)) {
35
if (ts_are_copies(dst_ts, src_ts)) {
36
- tcg_op_remove(s, op);
36
- tcg_op_remove(s, op);
37
+ tcg_op_remove(ctx->tcg, op);
37
+ tcg_op_remove(ctx->tcg, op);
38
return;
38
return;
39
}
39
}
40
40
41
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
41
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
42
}
42
}
43
}
43
}
44
44
45
-static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
45
-static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
46
- TCGOp *op, TCGArg dst, uint64_t val)
46
- TCGOp *op, TCGArg dst, uint64_t val)
47
+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
47
+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
48
+ TCGArg dst, uint64_t val)
48
+ TCGArg dst, uint64_t val)
49
{
49
{
50
const TCGOpDef *def = &tcg_op_defs[op->opc];
50
const TCGOpDef *def = &tcg_op_defs[op->opc];
51
TCGType type;
51
TCGType type;
52
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
52
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
53
/* Convert movi to mov with constant temp. */
53
/* Convert movi to mov with constant temp. */
54
tv = tcg_constant_internal(type, val);
54
tv = tcg_constant_internal(type, val);
55
init_ts_info(ctx, tv);
55
init_ts_info(ctx, tv);
56
- tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
56
- tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
57
+ tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
57
+ tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
58
}
58
}
59
59
60
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
60
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
{
62
{
63
int nb_temps, nb_globals, i;
63
int nb_temps, nb_globals, i;
64
TCGOp *op, *op_next, *prev_mb = NULL;
64
TCGOp *op, *op_next, *prev_mb = NULL;
65
- OptContext ctx = {};
65
- OptContext ctx = {};
66
+ OptContext ctx = { .tcg = s };
66
+ OptContext ctx = { .tcg = s };
67
67
68
/* Array VALS has an element for each temp.
68
/* Array VALS has an element for each temp.
69
If this temp holds a constant then its value is kept in VALS' element.
69
If this temp holds a constant then its value is kept in VALS' element.
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
CASE_OP_32_64(rotr):
71
CASE_OP_32_64(rotr):
72
if (arg_is_const(op->args[1])
72
if (arg_is_const(op->args[1])
73
&& arg_info(op->args[1])->val == 0) {
73
&& arg_info(op->args[1])->val == 0) {
74
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
74
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
76
continue;
76
continue;
77
}
77
}
78
break;
78
break;
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
if (!arg_is_const(op->args[1])
80
if (!arg_is_const(op->args[1])
81
&& arg_is_const(op->args[2])
81
&& arg_is_const(op->args[2])
82
&& arg_info(op->args[2])->val == 0) {
82
&& arg_info(op->args[2])->val == 0) {
83
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
83
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
84
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
84
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
85
continue;
85
continue;
86
}
86
}
87
break;
87
break;
88
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
88
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
89
if (!arg_is_const(op->args[1])
89
if (!arg_is_const(op->args[1])
90
&& arg_is_const(op->args[2])
90
&& arg_is_const(op->args[2])
91
&& arg_info(op->args[2])->val == -1) {
91
&& arg_info(op->args[2])->val == -1) {
92
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
92
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
93
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
93
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
94
continue;
94
continue;
95
}
95
}
96
break;
96
break;
97
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
97
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
98
98
99
if (partmask == 0) {
99
if (partmask == 0) {
100
tcg_debug_assert(nb_oargs == 1);
100
tcg_debug_assert(nb_oargs == 1);
101
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
101
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
102
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
102
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
103
continue;
103
continue;
104
}
104
}
105
if (affected == 0) {
105
if (affected == 0) {
106
tcg_debug_assert(nb_oargs == 1);
106
tcg_debug_assert(nb_oargs == 1);
107
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
107
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
108
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
108
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
109
continue;
109
continue;
110
}
110
}
111
111
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
CASE_OP_32_64(mulsh):
113
CASE_OP_32_64(mulsh):
114
if (arg_is_const(op->args[2])
114
if (arg_is_const(op->args[2])
115
&& arg_info(op->args[2])->val == 0) {
115
&& arg_info(op->args[2])->val == 0) {
116
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
116
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
117
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
117
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
118
continue;
118
continue;
119
}
119
}
120
break;
120
break;
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
122
CASE_OP_32_64_VEC(or):
122
CASE_OP_32_64_VEC(or):
123
CASE_OP_32_64_VEC(and):
123
CASE_OP_32_64_VEC(and):
124
if (args_are_copies(op->args[1], op->args[2])) {
124
if (args_are_copies(op->args[1], op->args[2])) {
125
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
125
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
126
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
126
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
127
continue;
127
continue;
128
}
128
}
129
break;
129
break;
130
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
131
CASE_OP_32_64_VEC(sub):
131
CASE_OP_32_64_VEC(sub):
132
CASE_OP_32_64_VEC(xor):
132
CASE_OP_32_64_VEC(xor):
133
if (args_are_copies(op->args[1], op->args[2])) {
133
if (args_are_copies(op->args[1], op->args[2])) {
134
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
134
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
135
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
135
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
136
continue;
136
continue;
137
}
137
}
138
break;
138
break;
139
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
139
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
140
allocator where needed and possible. Also detect copies. */
140
allocator where needed and possible. Also detect copies. */
141
switch (opc) {
141
switch (opc) {
142
CASE_OP_32_64_VEC(mov):
142
CASE_OP_32_64_VEC(mov):
143
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
143
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
144
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
144
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
145
continue;
145
continue;
146
146
147
case INDEX_op_dup_vec:
147
case INDEX_op_dup_vec:
148
if (arg_is_const(op->args[1])) {
148
if (arg_is_const(op->args[1])) {
149
tmp = arg_info(op->args[1])->val;
149
tmp = arg_info(op->args[1])->val;
150
tmp = dup_const(TCGOP_VECE(op), tmp);
150
tmp = dup_const(TCGOP_VECE(op), tmp);
151
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
151
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
152
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
153
continue;
153
continue;
154
}
154
}
155
break;
155
break;
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
157
case INDEX_op_dup2_vec:
157
case INDEX_op_dup2_vec:
158
assert(TCG_TARGET_REG_BITS == 32);
158
assert(TCG_TARGET_REG_BITS == 32);
159
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
159
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
160
- tcg_opt_gen_movi(s, &ctx, op, op->args[0],
160
- tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
+ tcg_opt_gen_movi(&ctx, op, op->args[0],
161
+ tcg_opt_gen_movi(&ctx, op, op->args[0],
162
deposit64(arg_info(op->args[1])->val, 32, 32,
162
deposit64(arg_info(op->args[1])->val, 32, 32,
163
arg_info(op->args[2])->val));
163
arg_info(op->args[2])->val));
164
continue;
164
continue;
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
166
case INDEX_op_extrh_i64_i32:
166
case INDEX_op_extrh_i64_i32:
167
if (arg_is_const(op->args[1])) {
167
if (arg_is_const(op->args[1])) {
168
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
168
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
169
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
169
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
171
continue;
171
continue;
172
}
172
}
173
break;
173
break;
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
175
if (arg_is_const(op->args[1])) {
175
if (arg_is_const(op->args[1])) {
176
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
176
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
177
op->args[2]);
177
op->args[2]);
178
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
178
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
180
continue;
180
continue;
181
}
181
}
182
break;
182
break;
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
184
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
185
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
185
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
186
arg_info(op->args[2])->val);
186
arg_info(op->args[2])->val);
187
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
187
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
189
continue;
189
continue;
190
}
190
}
191
break;
191
break;
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
193
TCGArg v = arg_info(op->args[1])->val;
193
TCGArg v = arg_info(op->args[1])->val;
194
if (v != 0) {
194
if (v != 0) {
195
tmp = do_constant_folding(opc, v, 0);
195
tmp = do_constant_folding(opc, v, 0);
196
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
196
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
198
} else {
198
} else {
199
- tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
199
- tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
200
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
200
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
201
}
201
}
202
continue;
202
continue;
203
}
203
}
204
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
204
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
205
tmp = deposit64(arg_info(op->args[1])->val,
205
tmp = deposit64(arg_info(op->args[1])->val,
206
op->args[3], op->args[4],
206
op->args[3], op->args[4],
207
arg_info(op->args[2])->val);
207
arg_info(op->args[2])->val);
208
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
208
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
209
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
209
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
210
continue;
210
continue;
211
}
211
}
212
break;
212
break;
213
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
213
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
214
if (arg_is_const(op->args[1])) {
214
if (arg_is_const(op->args[1])) {
215
tmp = extract64(arg_info(op->args[1])->val,
215
tmp = extract64(arg_info(op->args[1])->val,
216
op->args[2], op->args[3]);
216
op->args[2], op->args[3]);
217
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
217
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
218
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
218
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
219
continue;
219
continue;
220
}
220
}
221
break;
221
break;
222
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
222
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
223
if (arg_is_const(op->args[1])) {
223
if (arg_is_const(op->args[1])) {
224
tmp = sextract64(arg_info(op->args[1])->val,
224
tmp = sextract64(arg_info(op->args[1])->val,
225
op->args[2], op->args[3]);
225
op->args[2], op->args[3]);
226
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
226
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
227
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
227
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
228
continue;
228
continue;
229
}
229
}
230
break;
230
break;
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
tmp = (int32_t)(((uint32_t)v1 >> shr) |
232
tmp = (int32_t)(((uint32_t)v1 >> shr) |
233
((uint32_t)v2 << (32 - shr)));
233
((uint32_t)v2 << (32 - shr)));
234
}
234
}
235
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
235
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
236
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
236
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
237
continue;
237
continue;
238
}
238
}
239
break;
239
break;
240
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
240
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
241
tmp = do_constant_folding_cond(opc, op->args[1],
241
tmp = do_constant_folding_cond(opc, op->args[1],
242
op->args[2], op->args[3]);
242
op->args[2], op->args[3]);
243
if (tmp != 2) {
243
if (tmp != 2) {
244
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
244
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
245
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
245
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
246
continue;
246
continue;
247
}
247
}
248
break;
248
break;
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
tmp = do_constant_folding_cond(opc, op->args[1],
250
tmp = do_constant_folding_cond(opc, op->args[1],
251
op->args[2], op->args[5]);
251
op->args[2], op->args[5]);
252
if (tmp != 2) {
252
if (tmp != 2) {
253
- tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
253
- tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
254
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
254
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
255
continue;
255
continue;
256
}
256
}
257
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
257
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
258
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
258
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
259
259
260
rl = op->args[0];
260
rl = op->args[0];
261
rh = op->args[1];
261
rh = op->args[1];
262
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
262
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
263
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
263
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
264
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
264
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
265
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
265
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
266
continue;
266
continue;
267
}
267
}
268
break;
268
break;
269
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
269
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
270
270
271
rl = op->args[0];
271
rl = op->args[0];
272
rh = op->args[1];
272
rh = op->args[1];
273
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
273
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
274
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
274
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
275
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
275
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
276
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
276
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
277
continue;
277
continue;
278
}
278
}
279
break;
279
break;
280
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
280
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
281
op->args[5]);
281
op->args[5]);
282
if (tmp != 2) {
282
if (tmp != 2) {
283
do_setcond_const:
283
do_setcond_const:
284
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
284
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
285
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
285
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
286
continue;
286
continue;
287
}
287
}
288
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
288
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
289
--
289
--
290
2.25.1
290
2.25.1
291
291
292
292
diff view generated by jsdifflib
1
This will expose the variable to subroutines that
1
This will expose the variable to subroutines that
2
will be broken out of tcg_optimize.
2
will be broken out of tcg_optimize.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 11 ++++++-----
9
tcg/optimize.c | 11 ++++++-----
10
1 file changed, 6 insertions(+), 5 deletions(-)
10
1 file changed, 6 insertions(+), 5 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
17
17
18
typedef struct OptContext {
18
typedef struct OptContext {
19
TCGContext *tcg;
19
TCGContext *tcg;
20
+ TCGOp *prev_mb;
20
+ TCGOp *prev_mb;
21
TCGTempSet temps_used;
21
TCGTempSet temps_used;
22
} OptContext;
22
} OptContext;
23
23
24
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
24
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
25
void tcg_optimize(TCGContext *s)
25
void tcg_optimize(TCGContext *s)
26
{
26
{
27
int nb_temps, nb_globals, i;
27
int nb_temps, nb_globals, i;
28
- TCGOp *op, *op_next, *prev_mb = NULL;
28
- TCGOp *op, *op_next, *prev_mb = NULL;
29
+ TCGOp *op, *op_next;
29
+ TCGOp *op, *op_next;
30
OptContext ctx = { .tcg = s };
30
OptContext ctx = { .tcg = s };
31
31
32
/* Array VALS has an element for each temp.
32
/* Array VALS has an element for each temp.
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
34
}
34
}
35
35
36
/* Eliminate duplicate and redundant fence instructions. */
36
/* Eliminate duplicate and redundant fence instructions. */
37
- if (prev_mb) {
37
- if (prev_mb) {
38
+ if (ctx.prev_mb) {
38
+ if (ctx.prev_mb) {
39
switch (opc) {
39
switch (opc) {
40
case INDEX_op_mb:
40
case INDEX_op_mb:
41
/* Merge two barriers of the same type into one,
41
/* Merge two barriers of the same type into one,
42
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
42
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
43
* barrier. This is stricter than specified but for
43
* barrier. This is stricter than specified but for
44
* the purposes of TCG is better than not optimizing.
44
* the purposes of TCG is better than not optimizing.
45
*/
45
*/
46
- prev_mb->args[0] |= op->args[0];
46
- prev_mb->args[0] |= op->args[0];
47
+ ctx.prev_mb->args[0] |= op->args[0];
47
+ ctx.prev_mb->args[0] |= op->args[0];
48
tcg_op_remove(s, op);
48
tcg_op_remove(s, op);
49
break;
49
break;
50
50
51
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
51
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
52
case INDEX_op_qemu_st_i64:
52
case INDEX_op_qemu_st_i64:
53
case INDEX_op_call:
53
case INDEX_op_call:
54
/* Opcodes that touch guest memory stop the optimization. */
54
/* Opcodes that touch guest memory stop the optimization. */
55
- prev_mb = NULL;
55
- prev_mb = NULL;
56
+ ctx.prev_mb = NULL;
56
+ ctx.prev_mb = NULL;
57
break;
57
break;
58
}
58
}
59
} else if (opc == INDEX_op_mb) {
59
} else if (opc == INDEX_op_mb) {
60
- prev_mb = op;
60
- prev_mb = op;
61
+ ctx.prev_mb = op;
61
+ ctx.prev_mb = op;
62
}
62
}
63
}
63
}
64
}
64
}
65
--
65
--
66
2.25.1
66
2.25.1
67
67
68
68
diff view generated by jsdifflib
1
There was no real reason for calls to have separate code here.
1
There was no real reason for calls to have separate code here.
2
Unify init for calls vs non-calls using the call path, which
2
Unify init for calls vs non-calls using the call path, which
3
handles TCG_CALL_DUMMY_ARG.
3
handles TCG_CALL_DUMMY_ARG.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
9
---
10
tcg/optimize.c | 25 +++++++++++--------------
10
tcg/optimize.c | 25 +++++++++++--------------
11
1 file changed, 11 insertions(+), 14 deletions(-)
11
1 file changed, 11 insertions(+), 14 deletions(-)
12
12
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/optimize.c
15
--- a/tcg/optimize.c
16
+++ b/tcg/optimize.c
16
+++ b/tcg/optimize.c
17
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
17
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
18
}
18
}
19
}
19
}
20
20
21
-static void init_arg_info(OptContext *ctx, TCGArg arg)
21
-static void init_arg_info(OptContext *ctx, TCGArg arg)
22
-{
22
-{
23
- init_ts_info(ctx, arg_temp(arg));
23
- init_ts_info(ctx, arg_temp(arg));
24
-}
24
-}
25
-
25
-
26
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
26
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
27
{
27
{
28
TCGTemp *i, *g, *l;
28
TCGTemp *i, *g, *l;
29
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
29
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
30
return false;
30
return false;
31
}
31
}
32
32
33
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
33
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
34
+{
34
+{
35
+ for (int i = 0; i < nb_args; i++) {
35
+ for (int i = 0; i < nb_args; i++) {
36
+ TCGTemp *ts = arg_temp(op->args[i]);
36
+ TCGTemp *ts = arg_temp(op->args[i]);
37
+ if (ts) {
37
+ if (ts) {
38
+ init_ts_info(ctx, ts);
38
+ init_ts_info(ctx, ts);
39
+ }
39
+ }
40
+ }
40
+ }
41
+}
41
+}
42
+
42
+
43
/* Propagate constants and copies, fold constant expressions. */
43
/* Propagate constants and copies, fold constant expressions. */
44
void tcg_optimize(TCGContext *s)
44
void tcg_optimize(TCGContext *s)
45
{
45
{
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
47
if (opc == INDEX_op_call) {
47
if (opc == INDEX_op_call) {
48
nb_oargs = TCGOP_CALLO(op);
48
nb_oargs = TCGOP_CALLO(op);
49
nb_iargs = TCGOP_CALLI(op);
49
nb_iargs = TCGOP_CALLI(op);
50
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
50
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
51
- TCGTemp *ts = arg_temp(op->args[i]);
51
- TCGTemp *ts = arg_temp(op->args[i]);
52
- if (ts) {
52
- if (ts) {
53
- init_ts_info(&ctx, ts);
53
- init_ts_info(&ctx, ts);
54
- }
54
- }
55
- }
55
- }
56
} else {
56
} else {
57
nb_oargs = def->nb_oargs;
57
nb_oargs = def->nb_oargs;
58
nb_iargs = def->nb_iargs;
58
nb_iargs = def->nb_iargs;
59
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
59
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
60
- init_arg_info(&ctx, op->args[i]);
60
- init_arg_info(&ctx, op->args[i]);
61
- }
61
- }
62
}
62
}
63
+ init_arguments(&ctx, op, nb_oargs + nb_iargs);
63
+ init_arguments(&ctx, op, nb_oargs + nb_iargs);
64
64
65
/* Do copy propagation */
65
/* Do copy propagation */
66
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
66
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
67
--
67
--
68
2.25.1
68
2.25.1
69
69
70
70
diff view generated by jsdifflib
1
Continue splitting tcg_optimize.
1
Continue splitting tcg_optimize.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 22 ++++++++++++++--------
8
tcg/optimize.c | 22 ++++++++++++++--------
9
1 file changed, 14 insertions(+), 8 deletions(-)
9
1 file changed, 14 insertions(+), 8 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
15
@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
16
}
16
}
17
}
17
}
18
18
19
+static void copy_propagate(OptContext *ctx, TCGOp *op,
19
+static void copy_propagate(OptContext *ctx, TCGOp *op,
20
+ int nb_oargs, int nb_iargs)
20
+ int nb_oargs, int nb_iargs)
21
+{
21
+{
22
+ TCGContext *s = ctx->tcg;
22
+ TCGContext *s = ctx->tcg;
23
+
23
+
24
+ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
24
+ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
25
+ TCGTemp *ts = arg_temp(op->args[i]);
25
+ TCGTemp *ts = arg_temp(op->args[i]);
26
+ if (ts && ts_is_copy(ts)) {
26
+ if (ts && ts_is_copy(ts)) {
27
+ op->args[i] = temp_arg(find_better_copy(s, ts));
27
+ op->args[i] = temp_arg(find_better_copy(s, ts));
28
+ }
28
+ }
29
+ }
29
+ }
30
+}
30
+}
31
+
31
+
32
/* Propagate constants and copies, fold constant expressions. */
32
/* Propagate constants and copies, fold constant expressions. */
33
void tcg_optimize(TCGContext *s)
33
void tcg_optimize(TCGContext *s)
34
{
34
{
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
36
nb_iargs = def->nb_iargs;
36
nb_iargs = def->nb_iargs;
37
}
37
}
38
init_arguments(&ctx, op, nb_oargs + nb_iargs);
38
init_arguments(&ctx, op, nb_oargs + nb_iargs);
39
-
39
-
40
- /* Do copy propagation */
40
- /* Do copy propagation */
41
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
41
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
42
- TCGTemp *ts = arg_temp(op->args[i]);
42
- TCGTemp *ts = arg_temp(op->args[i]);
43
- if (ts && ts_is_copy(ts)) {
43
- if (ts && ts_is_copy(ts)) {
44
- op->args[i] = temp_arg(find_better_copy(s, ts));
44
- op->args[i] = temp_arg(find_better_copy(s, ts));
45
- }
45
- }
46
- }
46
- }
47
+ copy_propagate(&ctx, op, nb_oargs, nb_iargs);
47
+ copy_propagate(&ctx, op, nb_oargs, nb_iargs);
48
48
49
/* For commutative operations make constant second argument */
49
/* For commutative operations make constant second argument */
50
switch (opc) {
50
switch (opc) {
51
--
51
--
52
2.25.1
52
2.25.1
53
53
54
54
diff view generated by jsdifflib
1
Calls are special in that they have a variable number
1
Calls are special in that they have a variable number
2
of arguments, and need to be able to clobber globals.
2
of arguments, and need to be able to clobber globals.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
8
tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
9
1 file changed, 41 insertions(+), 22 deletions(-)
9
1 file changed, 41 insertions(+), 22 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
16
}
16
}
17
}
17
}
18
18
19
+static bool fold_call(OptContext *ctx, TCGOp *op)
19
+static bool fold_call(OptContext *ctx, TCGOp *op)
20
+{
20
+{
21
+ TCGContext *s = ctx->tcg;
21
+ TCGContext *s = ctx->tcg;
22
+ int nb_oargs = TCGOP_CALLO(op);
22
+ int nb_oargs = TCGOP_CALLO(op);
23
+ int nb_iargs = TCGOP_CALLI(op);
23
+ int nb_iargs = TCGOP_CALLI(op);
24
+ int flags, i;
24
+ int flags, i;
25
+
25
+
26
+ init_arguments(ctx, op, nb_oargs + nb_iargs);
26
+ init_arguments(ctx, op, nb_oargs + nb_iargs);
27
+ copy_propagate(ctx, op, nb_oargs, nb_iargs);
27
+ copy_propagate(ctx, op, nb_oargs, nb_iargs);
28
+
28
+
29
+ /* If the function reads or writes globals, reset temp data. */
29
+ /* If the function reads or writes globals, reset temp data. */
30
+ flags = tcg_call_flags(op);
30
+ flags = tcg_call_flags(op);
31
+ if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
31
+ if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
32
+ int nb_globals = s->nb_globals;
32
+ int nb_globals = s->nb_globals;
33
+
33
+
34
+ for (i = 0; i < nb_globals; i++) {
34
+ for (i = 0; i < nb_globals; i++) {
35
+ if (test_bit(i, ctx->temps_used.l)) {
35
+ if (test_bit(i, ctx->temps_used.l)) {
36
+ reset_ts(&ctx->tcg->temps[i]);
36
+ reset_ts(&ctx->tcg->temps[i]);
37
+ }
37
+ }
38
+ }
38
+ }
39
+ }
39
+ }
40
+
40
+
41
+ /* Reset temp data for outputs. */
41
+ /* Reset temp data for outputs. */
42
+ for (i = 0; i < nb_oargs; i++) {
42
+ for (i = 0; i < nb_oargs; i++) {
43
+ reset_temp(op->args[i]);
43
+ reset_temp(op->args[i]);
44
+ }
44
+ }
45
+
45
+
46
+ /* Stop optimizing MB across calls. */
46
+ /* Stop optimizing MB across calls. */
47
+ ctx->prev_mb = NULL;
47
+ ctx->prev_mb = NULL;
48
+ return true;
48
+ return true;
49
+}
49
+}
50
+
50
+
51
/* Propagate constants and copies, fold constant expressions. */
51
/* Propagate constants and copies, fold constant expressions. */
52
void tcg_optimize(TCGContext *s)
52
void tcg_optimize(TCGContext *s)
53
{
53
{
54
- int nb_temps, nb_globals, i;
54
- int nb_temps, nb_globals, i;
55
+ int nb_temps, i;
55
+ int nb_temps, i;
56
TCGOp *op, *op_next;
56
TCGOp *op, *op_next;
57
OptContext ctx = { .tcg = s };
57
OptContext ctx = { .tcg = s };
58
58
59
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
59
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
60
available through the doubly linked circular list. */
60
available through the doubly linked circular list. */
61
61
62
nb_temps = s->nb_temps;
62
nb_temps = s->nb_temps;
63
- nb_globals = s->nb_globals;
63
- nb_globals = s->nb_globals;
64
-
64
-
65
for (i = 0; i < nb_temps; ++i) {
65
for (i = 0; i < nb_temps; ++i) {
66
s->temps[i].state_ptr = NULL;
66
s->temps[i].state_ptr = NULL;
67
}
67
}
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
uint64_t z_mask, partmask, affected, tmp;
69
uint64_t z_mask, partmask, affected, tmp;
70
int nb_oargs, nb_iargs;
70
int nb_oargs, nb_iargs;
71
TCGOpcode opc = op->opc;
71
TCGOpcode opc = op->opc;
72
- const TCGOpDef *def = &tcg_op_defs[opc];
72
- const TCGOpDef *def = &tcg_op_defs[opc];
73
+ const TCGOpDef *def;
73
+ const TCGOpDef *def;
74
74
75
- /* Count the arguments, and initialize the temps that are
75
- /* Count the arguments, and initialize the temps that are
76
- going to be used */
76
- going to be used */
77
+ /* Calls are special. */
77
+ /* Calls are special. */
78
if (opc == INDEX_op_call) {
78
if (opc == INDEX_op_call) {
79
- nb_oargs = TCGOP_CALLO(op);
79
- nb_oargs = TCGOP_CALLO(op);
80
- nb_iargs = TCGOP_CALLI(op);
80
- nb_iargs = TCGOP_CALLI(op);
81
- } else {
81
- } else {
82
- nb_oargs = def->nb_oargs;
82
- nb_oargs = def->nb_oargs;
83
- nb_iargs = def->nb_iargs;
83
- nb_iargs = def->nb_iargs;
84
+ fold_call(&ctx, op);
84
+ fold_call(&ctx, op);
85
+ continue;
85
+ continue;
86
}
86
}
87
+
87
+
88
+ def = &tcg_op_defs[opc];
88
+ def = &tcg_op_defs[opc];
89
+ nb_oargs = def->nb_oargs;
89
+ nb_oargs = def->nb_oargs;
90
+ nb_iargs = def->nb_iargs;
90
+ nb_iargs = def->nb_iargs;
91
init_arguments(&ctx, op, nb_oargs + nb_iargs);
91
init_arguments(&ctx, op, nb_oargs + nb_iargs);
92
copy_propagate(&ctx, op, nb_oargs, nb_iargs);
92
copy_propagate(&ctx, op, nb_oargs, nb_iargs);
93
93
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
95
if (def->flags & TCG_OPF_BB_END) {
95
if (def->flags & TCG_OPF_BB_END) {
96
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
96
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
97
} else {
97
} else {
98
- if (opc == INDEX_op_call &&
98
- if (opc == INDEX_op_call &&
99
- !(tcg_call_flags(op)
99
- !(tcg_call_flags(op)
100
- & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
100
- & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
101
- for (i = 0; i < nb_globals; i++) {
101
- for (i = 0; i < nb_globals; i++) {
102
- if (test_bit(i, ctx.temps_used.l)) {
102
- if (test_bit(i, ctx.temps_used.l)) {
103
- reset_ts(&s->temps[i]);
103
- reset_ts(&s->temps[i]);
104
- }
104
- }
105
- }
105
- }
106
- }
106
- }
107
-
107
-
108
for (i = 0; i < nb_oargs; i++) {
108
for (i = 0; i < nb_oargs; i++) {
109
reset_temp(op->args[i]);
109
reset_temp(op->args[i]);
110
/* Save the corresponding known-zero bits mask for the
110
/* Save the corresponding known-zero bits mask for the
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
case INDEX_op_qemu_st_i32:
112
case INDEX_op_qemu_st_i32:
113
case INDEX_op_qemu_st8_i32:
113
case INDEX_op_qemu_st8_i32:
114
case INDEX_op_qemu_st_i64:
114
case INDEX_op_qemu_st_i64:
115
- case INDEX_op_call:
115
- case INDEX_op_call:
116
/* Opcodes that touch guest memory stop the optimization. */
116
/* Opcodes that touch guest memory stop the optimization. */
117
ctx.prev_mb = NULL;
117
ctx.prev_mb = NULL;
118
break;
118
break;
119
--
119
--
120
2.25.1
120
2.25.1
121
121
122
122
diff view generated by jsdifflib
1
Rather than try to keep these up-to-date across folding,
1
Rather than try to keep these up-to-date across folding,
2
re-read nb_oargs at the end, after re-reading the opcode.
2
re-read nb_oargs at the end, after re-reading the opcode.
3
3
4
A couple of asserts need dropping, but that will take care
4
A couple of asserts need dropping, but that will take care
5
of itself as we split the function further.
5
of itself as we split the function further.
6
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
10
---
11
tcg/optimize.c | 14 ++++----------
11
tcg/optimize.c | 14 ++++----------
12
1 file changed, 4 insertions(+), 10 deletions(-)
12
1 file changed, 4 insertions(+), 10 deletions(-)
13
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
16
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
17
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
19
19
20
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
20
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
21
uint64_t z_mask, partmask, affected, tmp;
21
uint64_t z_mask, partmask, affected, tmp;
22
- int nb_oargs, nb_iargs;
22
- int nb_oargs, nb_iargs;
23
TCGOpcode opc = op->opc;
23
TCGOpcode opc = op->opc;
24
const TCGOpDef *def;
24
const TCGOpDef *def;
25
25
26
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
26
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
27
}
27
}
28
28
29
def = &tcg_op_defs[opc];
29
def = &tcg_op_defs[opc];
30
- nb_oargs = def->nb_oargs;
30
- nb_oargs = def->nb_oargs;
31
- nb_iargs = def->nb_iargs;
31
- nb_iargs = def->nb_iargs;
32
- init_arguments(&ctx, op, nb_oargs + nb_iargs);
32
- init_arguments(&ctx, op, nb_oargs + nb_iargs);
33
- copy_propagate(&ctx, op, nb_oargs, nb_iargs);
33
- copy_propagate(&ctx, op, nb_oargs, nb_iargs);
34
+ init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
34
+ init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
35
+ copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
35
+ copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
36
36
37
/* For commutative operations make constant second argument */
37
/* For commutative operations make constant second argument */
38
switch (opc) {
38
switch (opc) {
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
40
40
41
CASE_OP_32_64(qemu_ld):
41
CASE_OP_32_64(qemu_ld):
42
{
42
{
43
- MemOpIdx oi = op->args[nb_oargs + nb_iargs];
43
- MemOpIdx oi = op->args[nb_oargs + nb_iargs];
44
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
44
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
45
MemOp mop = get_memop(oi);
45
MemOp mop = get_memop(oi);
46
if (!(mop & MO_SIGN)) {
46
if (!(mop & MO_SIGN)) {
47
z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
47
z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
}
49
}
50
50
51
if (partmask == 0) {
51
if (partmask == 0) {
52
- tcg_debug_assert(nb_oargs == 1);
52
- tcg_debug_assert(nb_oargs == 1);
53
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
53
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
54
continue;
54
continue;
55
}
55
}
56
if (affected == 0) {
56
if (affected == 0) {
57
- tcg_debug_assert(nb_oargs == 1);
57
- tcg_debug_assert(nb_oargs == 1);
58
tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
58
tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
59
continue;
59
continue;
60
}
60
}
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
} else if (args_are_copies(op->args[1], op->args[2])) {
62
} else if (args_are_copies(op->args[1], op->args[2])) {
63
op->opc = INDEX_op_dup_vec;
63
op->opc = INDEX_op_dup_vec;
64
TCGOP_VECE(op) = MO_32;
64
TCGOP_VECE(op) = MO_32;
65
- nb_iargs = 1;
65
- nb_iargs = 1;
66
}
66
}
67
break;
67
break;
68
68
69
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
70
op->opc = opc = (opc == INDEX_op_movcond_i32
70
op->opc = opc = (opc == INDEX_op_movcond_i32
71
? INDEX_op_setcond_i32
71
? INDEX_op_setcond_i32
72
: INDEX_op_setcond_i64);
72
: INDEX_op_setcond_i64);
73
- nb_iargs = 2;
73
- nb_iargs = 2;
74
}
74
}
75
break;
75
break;
76
76
77
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
78
if (def->flags & TCG_OPF_BB_END) {
78
if (def->flags & TCG_OPF_BB_END) {
79
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
79
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
80
} else {
80
} else {
81
+ int nb_oargs = def->nb_oargs;
81
+ int nb_oargs = def->nb_oargs;
82
for (i = 0; i < nb_oargs; i++) {
82
for (i = 0; i < nb_oargs; i++) {
83
reset_temp(op->args[i]);
83
reset_temp(op->args[i]);
84
/* Save the corresponding known-zero bits mask for the
84
/* Save the corresponding known-zero bits mask for the
85
--
85
--
86
2.25.1
86
2.25.1
87
87
88
88
diff view generated by jsdifflib
1
Return -1 instead of 2 for failure, so that we can
1
Return -1 instead of 2 for failure, so that we can
2
use comparisons against 0 for all cases.
2
use comparisons against 0 for all cases.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
8
tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
9
1 file changed, 74 insertions(+), 71 deletions(-)
9
1 file changed, 74 insertions(+), 71 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
15
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
16
}
16
}
17
}
17
}
18
18
19
-/* Return 2 if the condition can't be simplified, and the result
19
-/* Return 2 if the condition can't be simplified, and the result
20
- of the condition (0 or 1) if it can */
20
- of the condition (0 or 1) if it can */
21
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
21
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
22
- TCGArg y, TCGCond c)
22
- TCGArg y, TCGCond c)
23
+/*
23
+/*
24
+ * Return -1 if the condition can't be simplified,
24
+ * Return -1 if the condition can't be simplified,
25
+ * and the result of the condition (0 or 1) if it can.
25
+ * and the result of the condition (0 or 1) if it can.
26
+ */
26
+ */
27
+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
27
+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
28
+ TCGArg y, TCGCond c)
28
+ TCGArg y, TCGCond c)
29
{
29
{
30
uint64_t xv = arg_info(x)->val;
30
uint64_t xv = arg_info(x)->val;
31
uint64_t yv = arg_info(y)->val;
31
uint64_t yv = arg_info(y)->val;
32
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
32
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
33
case TCG_COND_GEU:
33
case TCG_COND_GEU:
34
return 1;
34
return 1;
35
default:
35
default:
36
- return 2;
36
- return 2;
37
+ return -1;
37
+ return -1;
38
}
38
}
39
}
39
}
40
- return 2;
40
- return 2;
41
+ return -1;
41
+ return -1;
42
}
42
}
43
43
44
-/* Return 2 if the condition can't be simplified, and the result
44
-/* Return 2 if the condition can't be simplified, and the result
45
- of the condition (0 or 1) if it can */
45
- of the condition (0 or 1) if it can */
46
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
46
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
47
+/*
47
+/*
48
+ * Return -1 if the condition can't be simplified,
48
+ * Return -1 if the condition can't be simplified,
49
+ * and the result of the condition (0 or 1) if it can.
49
+ * and the result of the condition (0 or 1) if it can.
50
+ */
50
+ */
51
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
51
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
52
{
52
{
53
TCGArg al = p1[0], ah = p1[1];
53
TCGArg al = p1[0], ah = p1[1];
54
TCGArg bl = p2[0], bh = p2[1];
54
TCGArg bl = p2[0], bh = p2[1];
55
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
55
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
56
if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
56
if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
57
return do_constant_folding_cond_eq(c);
57
return do_constant_folding_cond_eq(c);
58
}
58
}
59
- return 2;
59
- return 2;
60
+ return -1;
60
+ return -1;
61
}
61
}
62
62
63
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
63
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
65
break;
65
break;
66
66
67
CASE_OP_32_64(setcond):
67
CASE_OP_32_64(setcond):
68
- tmp = do_constant_folding_cond(opc, op->args[1],
68
- tmp = do_constant_folding_cond(opc, op->args[1],
69
- op->args[2], op->args[3]);
69
- op->args[2], op->args[3]);
70
- if (tmp != 2) {
70
- if (tmp != 2) {
71
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
71
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
72
+ i = do_constant_folding_cond(opc, op->args[1],
72
+ i = do_constant_folding_cond(opc, op->args[1],
73
+ op->args[2], op->args[3]);
73
+ op->args[2], op->args[3]);
74
+ if (i >= 0) {
74
+ if (i >= 0) {
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
76
continue;
76
continue;
77
}
77
}
78
break;
78
break;
79
79
80
CASE_OP_32_64(brcond):
80
CASE_OP_32_64(brcond):
81
- tmp = do_constant_folding_cond(opc, op->args[0],
81
- tmp = do_constant_folding_cond(opc, op->args[0],
82
- op->args[1], op->args[2]);
82
- op->args[1], op->args[2]);
83
- switch (tmp) {
83
- switch (tmp) {
84
- case 0:
84
- case 0:
85
+ i = do_constant_folding_cond(opc, op->args[0],
85
+ i = do_constant_folding_cond(opc, op->args[0],
86
+ op->args[1], op->args[2]);
86
+ op->args[1], op->args[2]);
87
+ if (i == 0) {
87
+ if (i == 0) {
88
tcg_op_remove(s, op);
88
tcg_op_remove(s, op);
89
continue;
89
continue;
90
- case 1:
90
- case 1:
91
+ } else if (i > 0) {
91
+ } else if (i > 0) {
92
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
92
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
93
op->opc = opc = INDEX_op_br;
93
op->opc = opc = INDEX_op_br;
94
op->args[0] = op->args[3];
94
op->args[0] = op->args[3];
95
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
95
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
96
break;
96
break;
97
97
98
CASE_OP_32_64(movcond):
98
CASE_OP_32_64(movcond):
99
- tmp = do_constant_folding_cond(opc, op->args[1],
99
- tmp = do_constant_folding_cond(opc, op->args[1],
100
- op->args[2], op->args[5]);
100
- op->args[2], op->args[5]);
101
- if (tmp != 2) {
101
- if (tmp != 2) {
102
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
102
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
103
+ i = do_constant_folding_cond(opc, op->args[1],
103
+ i = do_constant_folding_cond(opc, op->args[1],
104
+ op->args[2], op->args[5]);
104
+ op->args[2], op->args[5]);
105
+ if (i >= 0) {
105
+ if (i >= 0) {
106
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
106
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
107
continue;
107
continue;
108
}
108
}
109
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
109
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
break;
111
break;
112
112
113
case INDEX_op_brcond2_i32:
113
case INDEX_op_brcond2_i32:
114
- tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
114
- tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
115
- op->args[4]);
115
- op->args[4]);
116
- if (tmp == 0) {
116
- if (tmp == 0) {
117
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2],
117
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2],
118
+ op->args[4]);
118
+ op->args[4]);
119
+ if (i == 0) {
119
+ if (i == 0) {
120
do_brcond_false:
120
do_brcond_false:
121
tcg_op_remove(s, op);
121
tcg_op_remove(s, op);
122
continue;
122
continue;
123
}
123
}
124
- if (tmp == 1) {
124
- if (tmp == 1) {
125
+ if (i > 0) {
125
+ if (i > 0) {
126
do_brcond_true:
126
do_brcond_true:
127
op->opc = opc = INDEX_op_br;
127
op->opc = opc = INDEX_op_br;
128
op->args[0] = op->args[5];
128
op->args[0] = op->args[5];
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
if (op->args[4] == TCG_COND_EQ) {
130
if (op->args[4] == TCG_COND_EQ) {
131
/* Simplify EQ comparisons where one of the pairs
131
/* Simplify EQ comparisons where one of the pairs
132
can be simplified. */
132
can be simplified. */
133
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
133
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
134
- op->args[0], op->args[2],
134
- op->args[0], op->args[2],
135
- TCG_COND_EQ);
135
- TCG_COND_EQ);
136
- if (tmp == 0) {
136
- if (tmp == 0) {
137
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
137
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
138
+ op->args[0], op->args[2],
138
+ op->args[0], op->args[2],
139
+ TCG_COND_EQ);
139
+ TCG_COND_EQ);
140
+ if (i == 0) {
140
+ if (i == 0) {
141
goto do_brcond_false;
141
goto do_brcond_false;
142
- } else if (tmp == 1) {
142
- } else if (tmp == 1) {
143
+ } else if (i > 0) {
143
+ } else if (i > 0) {
144
goto do_brcond_high;
144
goto do_brcond_high;
145
}
145
}
146
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
146
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
147
- op->args[1], op->args[3],
147
- op->args[1], op->args[3],
148
- TCG_COND_EQ);
148
- TCG_COND_EQ);
149
- if (tmp == 0) {
149
- if (tmp == 0) {
150
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
150
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
151
+ op->args[1], op->args[3],
151
+ op->args[1], op->args[3],
152
+ TCG_COND_EQ);
152
+ TCG_COND_EQ);
153
+ if (i == 0) {
153
+ if (i == 0) {
154
goto do_brcond_false;
154
goto do_brcond_false;
155
- } else if (tmp != 1) {
155
- } else if (tmp != 1) {
156
+ } else if (i < 0) {
156
+ } else if (i < 0) {
157
break;
157
break;
158
}
158
}
159
do_brcond_low:
159
do_brcond_low:
160
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
160
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
161
if (op->args[4] == TCG_COND_NE) {
161
if (op->args[4] == TCG_COND_NE) {
162
/* Simplify NE comparisons where one of the pairs
162
/* Simplify NE comparisons where one of the pairs
163
can be simplified. */
163
can be simplified. */
164
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
164
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
165
- op->args[0], op->args[2],
165
- op->args[0], op->args[2],
166
- TCG_COND_NE);
166
- TCG_COND_NE);
167
- if (tmp == 0) {
167
- if (tmp == 0) {
168
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
168
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
169
+ op->args[0], op->args[2],
169
+ op->args[0], op->args[2],
170
+ TCG_COND_NE);
170
+ TCG_COND_NE);
171
+ if (i == 0) {
171
+ if (i == 0) {
172
goto do_brcond_high;
172
goto do_brcond_high;
173
- } else if (tmp == 1) {
173
- } else if (tmp == 1) {
174
+ } else if (i > 0) {
174
+ } else if (i > 0) {
175
goto do_brcond_true;
175
goto do_brcond_true;
176
}
176
}
177
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
177
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
178
- op->args[1], op->args[3],
178
- op->args[1], op->args[3],
179
- TCG_COND_NE);
179
- TCG_COND_NE);
180
- if (tmp == 0) {
180
- if (tmp == 0) {
181
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
181
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
182
+ op->args[1], op->args[3],
182
+ op->args[1], op->args[3],
183
+ TCG_COND_NE);
183
+ TCG_COND_NE);
184
+ if (i == 0) {
184
+ if (i == 0) {
185
goto do_brcond_low;
185
goto do_brcond_low;
186
- } else if (tmp == 1) {
186
- } else if (tmp == 1) {
187
+ } else if (i > 0) {
187
+ } else if (i > 0) {
188
goto do_brcond_true;
188
goto do_brcond_true;
189
}
189
}
190
}
190
}
191
break;
191
break;
192
192
193
case INDEX_op_setcond2_i32:
193
case INDEX_op_setcond2_i32:
194
- tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
194
- tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
195
- op->args[5]);
195
- op->args[5]);
196
- if (tmp != 2) {
196
- if (tmp != 2) {
197
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3],
197
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3],
198
+ op->args[5]);
198
+ op->args[5]);
199
+ if (i >= 0) {
199
+ if (i >= 0) {
200
do_setcond_const:
200
do_setcond_const:
201
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
201
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
202
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
202
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
203
continue;
203
continue;
204
}
204
}
205
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
205
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
206
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
206
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
207
if (op->args[5] == TCG_COND_EQ) {
207
if (op->args[5] == TCG_COND_EQ) {
208
/* Simplify EQ comparisons where one of the pairs
208
/* Simplify EQ comparisons where one of the pairs
209
can be simplified. */
209
can be simplified. */
210
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
210
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
211
- op->args[1], op->args[3],
211
- op->args[1], op->args[3],
212
- TCG_COND_EQ);
212
- TCG_COND_EQ);
213
- if (tmp == 0) {
213
- if (tmp == 0) {
214
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
214
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
215
+ op->args[1], op->args[3],
215
+ op->args[1], op->args[3],
216
+ TCG_COND_EQ);
216
+ TCG_COND_EQ);
217
+ if (i == 0) {
217
+ if (i == 0) {
218
goto do_setcond_const;
218
goto do_setcond_const;
219
- } else if (tmp == 1) {
219
- } else if (tmp == 1) {
220
+ } else if (i > 0) {
220
+ } else if (i > 0) {
221
goto do_setcond_high;
221
goto do_setcond_high;
222
}
222
}
223
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
223
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
224
- op->args[2], op->args[4],
224
- op->args[2], op->args[4],
225
- TCG_COND_EQ);
225
- TCG_COND_EQ);
226
- if (tmp == 0) {
226
- if (tmp == 0) {
227
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
227
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
228
+ op->args[2], op->args[4],
228
+ op->args[2], op->args[4],
229
+ TCG_COND_EQ);
229
+ TCG_COND_EQ);
230
+ if (i == 0) {
230
+ if (i == 0) {
231
goto do_setcond_high;
231
goto do_setcond_high;
232
- } else if (tmp != 1) {
232
- } else if (tmp != 1) {
233
+ } else if (i < 0) {
233
+ } else if (i < 0) {
234
break;
234
break;
235
}
235
}
236
do_setcond_low:
236
do_setcond_low:
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
238
if (op->args[5] == TCG_COND_NE) {
238
if (op->args[5] == TCG_COND_NE) {
239
/* Simplify NE comparisons where one of the pairs
239
/* Simplify NE comparisons where one of the pairs
240
can be simplified. */
240
can be simplified. */
241
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
241
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
242
- op->args[1], op->args[3],
242
- op->args[1], op->args[3],
243
- TCG_COND_NE);
243
- TCG_COND_NE);
244
- if (tmp == 0) {
244
- if (tmp == 0) {
245
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
245
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
246
+ op->args[1], op->args[3],
246
+ op->args[1], op->args[3],
247
+ TCG_COND_NE);
247
+ TCG_COND_NE);
248
+ if (i == 0) {
248
+ if (i == 0) {
249
goto do_setcond_high;
249
goto do_setcond_high;
250
- } else if (tmp == 1) {
250
- } else if (tmp == 1) {
251
+ } else if (i > 0) {
251
+ } else if (i > 0) {
252
goto do_setcond_const;
252
goto do_setcond_const;
253
}
253
}
254
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
254
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
255
- op->args[2], op->args[4],
255
- op->args[2], op->args[4],
256
- TCG_COND_NE);
256
- TCG_COND_NE);
257
- if (tmp == 0) {
257
- if (tmp == 0) {
258
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
258
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
259
+ op->args[2], op->args[4],
259
+ op->args[2], op->args[4],
260
+ TCG_COND_NE);
260
+ TCG_COND_NE);
261
+ if (i == 0) {
261
+ if (i == 0) {
262
goto do_setcond_low;
262
goto do_setcond_low;
263
- } else if (tmp == 1) {
263
- } else if (tmp == 1) {
264
+ } else if (i > 0) {
264
+ } else if (i > 0) {
265
goto do_setcond_const;
265
goto do_setcond_const;
266
}
266
}
267
}
267
}
268
--
268
--
269
2.25.1
269
2.25.1
270
270
271
271
diff view generated by jsdifflib
1
This will allow callers to tail call to these functions
1
This will allow callers to tail call to these functions
2
and return true indicating processing complete.
2
and return true indicating processing complete.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 9 +++++----
9
tcg/optimize.c | 9 +++++----
10
1 file changed, 5 insertions(+), 4 deletions(-)
10
1 file changed, 5 insertions(+), 4 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
16
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
17
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
17
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
18
}
18
}
19
19
20
-static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
20
-static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
21
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
21
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
22
{
22
{
23
TCGTemp *dst_ts = arg_temp(dst);
23
TCGTemp *dst_ts = arg_temp(dst);
24
TCGTemp *src_ts = arg_temp(src);
24
TCGTemp *src_ts = arg_temp(src);
25
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
25
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
26
26
27
if (ts_are_copies(dst_ts, src_ts)) {
27
if (ts_are_copies(dst_ts, src_ts)) {
28
tcg_op_remove(ctx->tcg, op);
28
tcg_op_remove(ctx->tcg, op);
29
- return;
29
- return;
30
+ return true;
30
+ return true;
31
}
31
}
32
32
33
reset_ts(dst_ts);
33
reset_ts(dst_ts);
34
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
34
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
35
di->is_const = si->is_const;
35
di->is_const = si->is_const;
36
di->val = si->val;
36
di->val = si->val;
37
}
37
}
38
+ return true;
38
+ return true;
39
}
39
}
40
40
41
-static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
41
-static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
42
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
42
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
43
TCGArg dst, uint64_t val)
43
TCGArg dst, uint64_t val)
44
{
44
{
45
const TCGOpDef *def = &tcg_op_defs[op->opc];
45
const TCGOpDef *def = &tcg_op_defs[op->opc];
46
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
46
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
47
/* Convert movi to mov with constant temp. */
47
/* Convert movi to mov with constant temp. */
48
tv = tcg_constant_internal(type, val);
48
tv = tcg_constant_internal(type, val);
49
init_ts_info(ctx, tv);
49
init_ts_info(ctx, tv);
50
- tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
50
- tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
51
+ return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
51
+ return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
52
}
52
}
53
53
54
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
54
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
55
--
55
--
56
2.25.1
56
2.25.1
57
57
58
58
diff view generated by jsdifflib
1
Copy z_mask into OptContext, for writeback to the
1
Copy z_mask into OptContext, for writeback to the
2
first output within the new function.
2
first output within the new function.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
8
tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
9
1 file changed, 33 insertions(+), 16 deletions(-)
9
1 file changed, 33 insertions(+), 16 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
15
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
16
TCGContext *tcg;
16
TCGContext *tcg;
17
TCGOp *prev_mb;
17
TCGOp *prev_mb;
18
TCGTempSet temps_used;
18
TCGTempSet temps_used;
19
+
19
+
20
+ /* In flight values from optimization. */
20
+ /* In flight values from optimization. */
21
+ uint64_t z_mask;
21
+ uint64_t z_mask;
22
} OptContext;
22
} OptContext;
23
23
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
25
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
25
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
26
}
26
}
27
}
27
}
28
28
29
+static void finish_folding(OptContext *ctx, TCGOp *op)
29
+static void finish_folding(OptContext *ctx, TCGOp *op)
30
+{
30
+{
31
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
31
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
32
+ int i, nb_oargs;
32
+ int i, nb_oargs;
33
+
33
+
34
+ /*
34
+ /*
35
+ * For an opcode that ends a BB, reset all temp data.
35
+ * For an opcode that ends a BB, reset all temp data.
36
+ * We do no cross-BB optimization.
36
+ * We do no cross-BB optimization.
37
+ */
37
+ */
38
+ if (def->flags & TCG_OPF_BB_END) {
38
+ if (def->flags & TCG_OPF_BB_END) {
39
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
39
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
40
+ ctx->prev_mb = NULL;
40
+ ctx->prev_mb = NULL;
41
+ return;
41
+ return;
42
+ }
42
+ }
43
+
43
+
44
+ nb_oargs = def->nb_oargs;
44
+ nb_oargs = def->nb_oargs;
45
+ for (i = 0; i < nb_oargs; i++) {
45
+ for (i = 0; i < nb_oargs; i++) {
46
+ reset_temp(op->args[i]);
46
+ reset_temp(op->args[i]);
47
+ /*
47
+ /*
48
+ * Save the corresponding known-zero bits mask for the
48
+ * Save the corresponding known-zero bits mask for the
49
+ * first output argument (only one supported so far).
49
+ * first output argument (only one supported so far).
50
+ */
50
+ */
51
+ if (i == 0) {
51
+ if (i == 0) {
52
+ arg_info(op->args[i])->z_mask = ctx->z_mask;
52
+ arg_info(op->args[i])->z_mask = ctx->z_mask;
53
+ }
53
+ }
54
+ }
54
+ }
55
+}
55
+}
56
+
56
+
57
static bool fold_call(OptContext *ctx, TCGOp *op)
57
static bool fold_call(OptContext *ctx, TCGOp *op)
58
{
58
{
59
TCGContext *s = ctx->tcg;
59
TCGContext *s = ctx->tcg;
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
partmask &= 0xffffffffu;
61
partmask &= 0xffffffffu;
62
affected &= 0xffffffffu;
62
affected &= 0xffffffffu;
63
}
63
}
64
+ ctx.z_mask = z_mask;
64
+ ctx.z_mask = z_mask;
65
65
66
if (partmask == 0) {
66
if (partmask == 0) {
67
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
67
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
break;
69
break;
70
}
70
}
71
71
72
- /* Some of the folding above can change opc. */
72
- /* Some of the folding above can change opc. */
73
- opc = op->opc;
73
- opc = op->opc;
74
- def = &tcg_op_defs[opc];
74
- def = &tcg_op_defs[opc];
75
- if (def->flags & TCG_OPF_BB_END) {
75
- if (def->flags & TCG_OPF_BB_END) {
76
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
76
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
77
- } else {
77
- } else {
78
- int nb_oargs = def->nb_oargs;
78
- int nb_oargs = def->nb_oargs;
79
- for (i = 0; i < nb_oargs; i++) {
79
- for (i = 0; i < nb_oargs; i++) {
80
- reset_temp(op->args[i]);
80
- reset_temp(op->args[i]);
81
- /* Save the corresponding known-zero bits mask for the
81
- /* Save the corresponding known-zero bits mask for the
82
- first output argument (only one supported so far). */
82
- first output argument (only one supported so far). */
83
- if (i == 0) {
83
- if (i == 0) {
84
- arg_info(op->args[i])->z_mask = z_mask;
84
- arg_info(op->args[i])->z_mask = z_mask;
85
- }
85
- }
86
- }
86
- }
87
- }
87
- }
88
+ finish_folding(&ctx, op);
88
+ finish_folding(&ctx, op);
89
89
90
/* Eliminate duplicate and redundant fence instructions. */
90
/* Eliminate duplicate and redundant fence instructions. */
91
if (ctx.prev_mb) {
91
if (ctx.prev_mb) {
92
--
92
--
93
2.25.1
93
2.25.1
94
94
95
95
diff view generated by jsdifflib
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
tcg/optimize.c | 9 ++++++---
6
tcg/optimize.c | 9 ++++++---
7
1 file changed, 6 insertions(+), 3 deletions(-)
7
1 file changed, 6 insertions(+), 3 deletions(-)
8
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
14
uint64_t z_mask, partmask, affected, tmp;
14
uint64_t z_mask, partmask, affected, tmp;
15
TCGOpcode opc = op->opc;
15
TCGOpcode opc = op->opc;
16
const TCGOpDef *def;
16
const TCGOpDef *def;
17
+ bool done = false;
17
+ bool done = false;
18
18
19
/* Calls are special. */
19
/* Calls are special. */
20
if (opc == INDEX_op_call) {
20
if (opc == INDEX_op_call) {
21
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
21
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
22
allocator where needed and possible. Also detect copies. */
22
allocator where needed and possible. Also detect copies. */
23
switch (opc) {
23
switch (opc) {
24
CASE_OP_32_64_VEC(mov):
24
CASE_OP_32_64_VEC(mov):
25
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
25
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
26
- continue;
26
- continue;
27
+ done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
27
+ done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
28
+ break;
28
+ break;
29
29
30
case INDEX_op_dup_vec:
30
case INDEX_op_dup_vec:
31
if (arg_is_const(op->args[1])) {
31
if (arg_is_const(op->args[1])) {
32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
break;
33
break;
34
}
34
}
35
35
36
- finish_folding(&ctx, op);
36
- finish_folding(&ctx, op);
37
+ if (!done) {
37
+ if (!done) {
38
+ finish_folding(&ctx, op);
38
+ finish_folding(&ctx, op);
39
+ }
39
+ }
40
40
41
/* Eliminate duplicate and redundant fence instructions. */
41
/* Eliminate duplicate and redundant fence instructions. */
42
if (ctx.prev_mb) {
42
if (ctx.prev_mb) {
43
--
43
--
44
2.25.1
44
2.25.1
45
45
46
46
diff view generated by jsdifflib
1
This puts the separate mb optimization into the same framework
1
This puts the separate mb optimization into the same framework
2
as the others. While fold_qemu_{ld,st} are currently identical,
2
as the others. While fold_qemu_{ld,st} are currently identical,
3
that won't last as more code gets moved.
3
that won't last as more code gets moved.
4
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
9
tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
10
1 file changed, 51 insertions(+), 38 deletions(-)
10
1 file changed, 51 insertions(+), 38 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
17
return true;
17
return true;
18
}
18
}
19
19
20
+static bool fold_mb(OptContext *ctx, TCGOp *op)
20
+static bool fold_mb(OptContext *ctx, TCGOp *op)
21
+{
21
+{
22
+ /* Eliminate duplicate and redundant fence instructions. */
22
+ /* Eliminate duplicate and redundant fence instructions. */
23
+ if (ctx->prev_mb) {
23
+ if (ctx->prev_mb) {
24
+ /*
24
+ /*
25
+ * Merge two barriers of the same type into one,
25
+ * Merge two barriers of the same type into one,
26
+ * or a weaker barrier into a stronger one,
26
+ * or a weaker barrier into a stronger one,
27
+ * or two weaker barriers into a stronger one.
27
+ * or two weaker barriers into a stronger one.
28
+ * mb X; mb Y => mb X|Y
28
+ * mb X; mb Y => mb X|Y
29
+ * mb; strl => mb; st
29
+ * mb; strl => mb; st
30
+ * ldaq; mb => ld; mb
30
+ * ldaq; mb => ld; mb
31
+ * ldaq; strl => ld; mb; st
31
+ * ldaq; strl => ld; mb; st
32
+ * Other combinations are also merged into a strong
32
+ * Other combinations are also merged into a strong
33
+ * barrier. This is stricter than specified but for
33
+ * barrier. This is stricter than specified but for
34
+ * the purposes of TCG is better than not optimizing.
34
+ * the purposes of TCG is better than not optimizing.
35
+ */
35
+ */
36
+ ctx->prev_mb->args[0] |= op->args[0];
36
+ ctx->prev_mb->args[0] |= op->args[0];
37
+ tcg_op_remove(ctx->tcg, op);
37
+ tcg_op_remove(ctx->tcg, op);
38
+ } else {
38
+ } else {
39
+ ctx->prev_mb = op;
39
+ ctx->prev_mb = op;
40
+ }
40
+ }
41
+ return true;
41
+ return true;
42
+}
42
+}
43
+
43
+
44
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
44
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
45
+{
45
+{
46
+ /* Opcodes that touch guest memory stop the mb optimization. */
46
+ /* Opcodes that touch guest memory stop the mb optimization. */
47
+ ctx->prev_mb = NULL;
47
+ ctx->prev_mb = NULL;
48
+ return false;
48
+ return false;
49
+}
49
+}
50
+
50
+
51
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
51
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
52
+{
52
+{
53
+ /* Opcodes that touch guest memory stop the mb optimization. */
53
+ /* Opcodes that touch guest memory stop the mb optimization. */
54
+ ctx->prev_mb = NULL;
54
+ ctx->prev_mb = NULL;
55
+ return false;
55
+ return false;
56
+}
56
+}
57
+
57
+
58
/* Propagate constants and copies, fold constant expressions. */
58
/* Propagate constants and copies, fold constant expressions. */
59
void tcg_optimize(TCGContext *s)
59
void tcg_optimize(TCGContext *s)
60
{
60
{
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
}
62
}
63
break;
63
break;
64
64
65
+ case INDEX_op_mb:
65
+ case INDEX_op_mb:
66
+ done = fold_mb(&ctx, op);
66
+ done = fold_mb(&ctx, op);
67
+ break;
67
+ break;
68
+ case INDEX_op_qemu_ld_i32:
68
+ case INDEX_op_qemu_ld_i32:
69
+ case INDEX_op_qemu_ld_i64:
69
+ case INDEX_op_qemu_ld_i64:
70
+ done = fold_qemu_ld(&ctx, op);
70
+ done = fold_qemu_ld(&ctx, op);
71
+ break;
71
+ break;
72
+ case INDEX_op_qemu_st_i32:
72
+ case INDEX_op_qemu_st_i32:
73
+ case INDEX_op_qemu_st8_i32:
73
+ case INDEX_op_qemu_st8_i32:
74
+ case INDEX_op_qemu_st_i64:
74
+ case INDEX_op_qemu_st_i64:
75
+ done = fold_qemu_st(&ctx, op);
75
+ done = fold_qemu_st(&ctx, op);
76
+ break;
76
+ break;
77
+
77
+
78
default:
78
default:
79
break;
79
break;
80
}
80
}
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
82
if (!done) {
82
if (!done) {
83
finish_folding(&ctx, op);
83
finish_folding(&ctx, op);
84
}
84
}
85
-
85
-
86
- /* Eliminate duplicate and redundant fence instructions. */
86
- /* Eliminate duplicate and redundant fence instructions. */
87
- if (ctx.prev_mb) {
87
- if (ctx.prev_mb) {
88
- switch (opc) {
88
- switch (opc) {
89
- case INDEX_op_mb:
89
- case INDEX_op_mb:
90
- /* Merge two barriers of the same type into one,
90
- /* Merge two barriers of the same type into one,
91
- * or a weaker barrier into a stronger one,
91
- * or a weaker barrier into a stronger one,
92
- * or two weaker barriers into a stronger one.
92
- * or two weaker barriers into a stronger one.
93
- * mb X; mb Y => mb X|Y
93
- * mb X; mb Y => mb X|Y
94
- * mb; strl => mb; st
94
- * mb; strl => mb; st
95
- * ldaq; mb => ld; mb
95
- * ldaq; mb => ld; mb
96
- * ldaq; strl => ld; mb; st
96
- * ldaq; strl => ld; mb; st
97
- * Other combinations are also merged into a strong
97
- * Other combinations are also merged into a strong
98
- * barrier. This is stricter than specified but for
98
- * barrier. This is stricter than specified but for
99
- * the purposes of TCG is better than not optimizing.
99
- * the purposes of TCG is better than not optimizing.
100
- */
100
- */
101
- ctx.prev_mb->args[0] |= op->args[0];
101
- ctx.prev_mb->args[0] |= op->args[0];
102
- tcg_op_remove(s, op);
102
- tcg_op_remove(s, op);
103
- break;
103
- break;
104
-
104
-
105
- default:
105
- default:
106
- /* Opcodes that end the block stop the optimization. */
106
- /* Opcodes that end the block stop the optimization. */
107
- if ((def->flags & TCG_OPF_BB_END) == 0) {
107
- if ((def->flags & TCG_OPF_BB_END) == 0) {
108
- break;
108
- break;
109
- }
109
- }
110
- /* fallthru */
110
- /* fallthru */
111
- case INDEX_op_qemu_ld_i32:
111
- case INDEX_op_qemu_ld_i32:
112
- case INDEX_op_qemu_ld_i64:
112
- case INDEX_op_qemu_ld_i64:
113
- case INDEX_op_qemu_st_i32:
113
- case INDEX_op_qemu_st_i32:
114
- case INDEX_op_qemu_st8_i32:
114
- case INDEX_op_qemu_st8_i32:
115
- case INDEX_op_qemu_st_i64:
115
- case INDEX_op_qemu_st_i64:
116
- /* Opcodes that touch guest memory stop the optimization. */
116
- /* Opcodes that touch guest memory stop the optimization. */
117
- ctx.prev_mb = NULL;
117
- ctx.prev_mb = NULL;
118
- break;
118
- break;
119
- }
119
- }
120
- } else if (opc == INDEX_op_mb) {
120
- } else if (opc == INDEX_op_mb) {
121
- ctx.prev_mb = op;
121
- ctx.prev_mb = op;
122
- }
122
- }
123
}
123
}
124
}
124
}
125
--
125
--
126
2.25.1
126
2.25.1
127
127
128
128
diff view generated by jsdifflib
1
Split out a whole bunch of placeholder functions, which are
1
Split out a whole bunch of placeholder functions, which are
2
currently identical. That won't last as more code gets moved.
2
currently identical. That won't last as more code gets moved.
3
3
4
Use CASE_32_64_VEC for some logical operators that previously
4
Use CASE_32_64_VEC for some logical operators that previously
5
missed the addition of vectors.
5
missed the addition of vectors.
6
6
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
10
---
11
tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
11
tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
12
1 file changed, 219 insertions(+), 52 deletions(-)
12
1 file changed, 219 insertions(+), 52 deletions(-)
13
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
16
--- a/tcg/optimize.c
17
+++ b/tcg/optimize.c
17
+++ b/tcg/optimize.c
18
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
18
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
19
}
19
}
20
}
20
}
21
21
22
+/*
22
+/*
23
+ * The fold_* functions return true when processing is complete,
23
+ * The fold_* functions return true when processing is complete,
24
+ * usually by folding the operation to a constant or to a copy,
24
+ * usually by folding the operation to a constant or to a copy,
25
+ * and calling tcg_opt_gen_{mov,movi}. They may do other things,
25
+ * and calling tcg_opt_gen_{mov,movi}. They may do other things,
26
+ * like collect information about the value produced, for use in
26
+ * like collect information about the value produced, for use in
27
+ * optimizing a subsequent operation.
27
+ * optimizing a subsequent operation.
28
+ *
28
+ *
29
+ * These first fold_* functions are all helpers, used by other
29
+ * These first fold_* functions are all helpers, used by other
30
+ * folders for more specific operations.
30
+ * folders for more specific operations.
31
+ */
31
+ */
32
+
32
+
33
+static bool fold_const1(OptContext *ctx, TCGOp *op)
33
+static bool fold_const1(OptContext *ctx, TCGOp *op)
34
+{
34
+{
35
+ if (arg_is_const(op->args[1])) {
35
+ if (arg_is_const(op->args[1])) {
36
+ uint64_t t;
36
+ uint64_t t;
37
+
37
+
38
+ t = arg_info(op->args[1])->val;
38
+ t = arg_info(op->args[1])->val;
39
+ t = do_constant_folding(op->opc, t, 0);
39
+ t = do_constant_folding(op->opc, t, 0);
40
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
40
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
41
+ }
41
+ }
42
+ return false;
42
+ return false;
43
+}
43
+}
44
+
44
+
45
+static bool fold_const2(OptContext *ctx, TCGOp *op)
45
+static bool fold_const2(OptContext *ctx, TCGOp *op)
46
+{
46
+{
47
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
47
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
48
+ uint64_t t1 = arg_info(op->args[1])->val;
48
+ uint64_t t1 = arg_info(op->args[1])->val;
49
+ uint64_t t2 = arg_info(op->args[2])->val;
49
+ uint64_t t2 = arg_info(op->args[2])->val;
50
+
50
+
51
+ t1 = do_constant_folding(op->opc, t1, t2);
51
+ t1 = do_constant_folding(op->opc, t1, t2);
52
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
52
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
53
+ }
53
+ }
54
+ return false;
54
+ return false;
55
+}
55
+}
56
+
56
+
57
+/*
57
+/*
58
+ * These outermost fold_<op> functions are sorted alphabetically.
58
+ * These outermost fold_<op> functions are sorted alphabetically.
59
+ */
59
+ */
60
+
60
+
61
+static bool fold_add(OptContext *ctx, TCGOp *op)
61
+static bool fold_add(OptContext *ctx, TCGOp *op)
62
+{
62
+{
63
+ return fold_const2(ctx, op);
63
+ return fold_const2(ctx, op);
64
+}
64
+}
65
+
65
+
66
+static bool fold_and(OptContext *ctx, TCGOp *op)
66
+static bool fold_and(OptContext *ctx, TCGOp *op)
67
+{
67
+{
68
+ return fold_const2(ctx, op);
68
+ return fold_const2(ctx, op);
69
+}
69
+}
70
+
70
+
71
+static bool fold_andc(OptContext *ctx, TCGOp *op)
71
+static bool fold_andc(OptContext *ctx, TCGOp *op)
72
+{
72
+{
73
+ return fold_const2(ctx, op);
73
+ return fold_const2(ctx, op);
74
+}
74
+}
75
+
75
+
76
static bool fold_call(OptContext *ctx, TCGOp *op)
76
static bool fold_call(OptContext *ctx, TCGOp *op)
77
{
77
{
78
TCGContext *s = ctx->tcg;
78
TCGContext *s = ctx->tcg;
79
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
79
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
80
return true;
80
return true;
81
}
81
}
82
82
83
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
83
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
84
+{
84
+{
85
+ return fold_const1(ctx, op);
85
+ return fold_const1(ctx, op);
86
+}
86
+}
87
+
87
+
88
+static bool fold_divide(OptContext *ctx, TCGOp *op)
88
+static bool fold_divide(OptContext *ctx, TCGOp *op)
89
+{
89
+{
90
+ return fold_const2(ctx, op);
90
+ return fold_const2(ctx, op);
91
+}
91
+}
92
+
92
+
93
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
93
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
94
+{
94
+{
95
+ return fold_const2(ctx, op);
95
+ return fold_const2(ctx, op);
96
+}
96
+}
97
+
97
+
98
+static bool fold_exts(OptContext *ctx, TCGOp *op)
98
+static bool fold_exts(OptContext *ctx, TCGOp *op)
99
+{
99
+{
100
+ return fold_const1(ctx, op);
100
+ return fold_const1(ctx, op);
101
+}
101
+}
102
+
102
+
103
+static bool fold_extu(OptContext *ctx, TCGOp *op)
103
+static bool fold_extu(OptContext *ctx, TCGOp *op)
104
+{
104
+{
105
+ return fold_const1(ctx, op);
105
+ return fold_const1(ctx, op);
106
+}
106
+}
107
+
107
+
108
static bool fold_mb(OptContext *ctx, TCGOp *op)
108
static bool fold_mb(OptContext *ctx, TCGOp *op)
109
{
109
{
110
/* Eliminate duplicate and redundant fence instructions. */
110
/* Eliminate duplicate and redundant fence instructions. */
111
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
111
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
112
return true;
112
return true;
113
}
113
}
114
114
115
+static bool fold_mul(OptContext *ctx, TCGOp *op)
115
+static bool fold_mul(OptContext *ctx, TCGOp *op)
116
+{
116
+{
117
+ return fold_const2(ctx, op);
117
+ return fold_const2(ctx, op);
118
+}
118
+}
119
+
119
+
120
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
120
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
121
+{
121
+{
122
+ return fold_const2(ctx, op);
122
+ return fold_const2(ctx, op);
123
+}
123
+}
124
+
124
+
125
+static bool fold_nand(OptContext *ctx, TCGOp *op)
125
+static bool fold_nand(OptContext *ctx, TCGOp *op)
126
+{
126
+{
127
+ return fold_const2(ctx, op);
127
+ return fold_const2(ctx, op);
128
+}
128
+}
129
+
129
+
130
+static bool fold_neg(OptContext *ctx, TCGOp *op)
130
+static bool fold_neg(OptContext *ctx, TCGOp *op)
131
+{
131
+{
132
+ return fold_const1(ctx, op);
132
+ return fold_const1(ctx, op);
133
+}
133
+}
134
+
134
+
135
+static bool fold_nor(OptContext *ctx, TCGOp *op)
135
+static bool fold_nor(OptContext *ctx, TCGOp *op)
136
+{
136
+{
137
+ return fold_const2(ctx, op);
137
+ return fold_const2(ctx, op);
138
+}
138
+}
139
+
139
+
140
+static bool fold_not(OptContext *ctx, TCGOp *op)
140
+static bool fold_not(OptContext *ctx, TCGOp *op)
141
+{
141
+{
142
+ return fold_const1(ctx, op);
142
+ return fold_const1(ctx, op);
143
+}
143
+}
144
+
144
+
145
+static bool fold_or(OptContext *ctx, TCGOp *op)
145
+static bool fold_or(OptContext *ctx, TCGOp *op)
146
+{
146
+{
147
+ return fold_const2(ctx, op);
147
+ return fold_const2(ctx, op);
148
+}
148
+}
149
+
149
+
150
+static bool fold_orc(OptContext *ctx, TCGOp *op)
150
+static bool fold_orc(OptContext *ctx, TCGOp *op)
151
+{
151
+{
152
+ return fold_const2(ctx, op);
152
+ return fold_const2(ctx, op);
153
+}
153
+}
154
+
154
+
155
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
155
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
156
{
156
{
157
/* Opcodes that touch guest memory stop the mb optimization. */
157
/* Opcodes that touch guest memory stop the mb optimization. */
158
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
158
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
159
return false;
159
return false;
160
}
160
}
161
161
162
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
162
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
163
+{
163
+{
164
+ return fold_const2(ctx, op);
164
+ return fold_const2(ctx, op);
165
+}
165
+}
166
+
166
+
167
+static bool fold_shift(OptContext *ctx, TCGOp *op)
167
+static bool fold_shift(OptContext *ctx, TCGOp *op)
168
+{
168
+{
169
+ return fold_const2(ctx, op);
169
+ return fold_const2(ctx, op);
170
+}
170
+}
171
+
171
+
172
+static bool fold_sub(OptContext *ctx, TCGOp *op)
172
+static bool fold_sub(OptContext *ctx, TCGOp *op)
173
+{
173
+{
174
+ return fold_const2(ctx, op);
174
+ return fold_const2(ctx, op);
175
+}
175
+}
176
+
176
+
177
+static bool fold_xor(OptContext *ctx, TCGOp *op)
177
+static bool fold_xor(OptContext *ctx, TCGOp *op)
178
+{
178
+{
179
+ return fold_const2(ctx, op);
179
+ return fold_const2(ctx, op);
180
+}
180
+}
181
+
181
+
182
/* Propagate constants and copies, fold constant expressions. */
182
/* Propagate constants and copies, fold constant expressions. */
183
void tcg_optimize(TCGContext *s)
183
void tcg_optimize(TCGContext *s)
184
{
184
{
185
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
185
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
186
}
186
}
187
break;
187
break;
188
188
189
- CASE_OP_32_64(not):
189
- CASE_OP_32_64(not):
190
- CASE_OP_32_64(neg):
190
- CASE_OP_32_64(neg):
191
- CASE_OP_32_64(ext8s):
191
- CASE_OP_32_64(ext8s):
192
- CASE_OP_32_64(ext8u):
192
- CASE_OP_32_64(ext8u):
193
- CASE_OP_32_64(ext16s):
193
- CASE_OP_32_64(ext16s):
194
- CASE_OP_32_64(ext16u):
194
- CASE_OP_32_64(ext16u):
195
- CASE_OP_32_64(ctpop):
195
- CASE_OP_32_64(ctpop):
196
- case INDEX_op_ext32s_i64:
196
- case INDEX_op_ext32s_i64:
197
- case INDEX_op_ext32u_i64:
197
- case INDEX_op_ext32u_i64:
198
- case INDEX_op_ext_i32_i64:
198
- case INDEX_op_ext_i32_i64:
199
- case INDEX_op_extu_i32_i64:
199
- case INDEX_op_extu_i32_i64:
200
- case INDEX_op_extrl_i64_i32:
200
- case INDEX_op_extrl_i64_i32:
201
- case INDEX_op_extrh_i64_i32:
201
- case INDEX_op_extrh_i64_i32:
202
- if (arg_is_const(op->args[1])) {
202
- if (arg_is_const(op->args[1])) {
203
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
203
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
204
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
204
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
205
- continue;
205
- continue;
206
- }
206
- }
207
- break;
207
- break;
208
-
208
-
209
CASE_OP_32_64(bswap16):
209
CASE_OP_32_64(bswap16):
210
CASE_OP_32_64(bswap32):
210
CASE_OP_32_64(bswap32):
211
case INDEX_op_bswap64_i64:
211
case INDEX_op_bswap64_i64:
212
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
212
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
213
}
213
}
214
break;
214
break;
215
215
216
- CASE_OP_32_64(add):
216
- CASE_OP_32_64(add):
217
- CASE_OP_32_64(sub):
217
- CASE_OP_32_64(sub):
218
- CASE_OP_32_64(mul):
218
- CASE_OP_32_64(mul):
219
- CASE_OP_32_64(or):
219
- CASE_OP_32_64(or):
220
- CASE_OP_32_64(and):
220
- CASE_OP_32_64(and):
221
- CASE_OP_32_64(xor):
221
- CASE_OP_32_64(xor):
222
- CASE_OP_32_64(shl):
222
- CASE_OP_32_64(shl):
223
- CASE_OP_32_64(shr):
223
- CASE_OP_32_64(shr):
224
- CASE_OP_32_64(sar):
224
- CASE_OP_32_64(sar):
225
- CASE_OP_32_64(rotl):
225
- CASE_OP_32_64(rotl):
226
- CASE_OP_32_64(rotr):
226
- CASE_OP_32_64(rotr):
227
- CASE_OP_32_64(andc):
227
- CASE_OP_32_64(andc):
228
- CASE_OP_32_64(orc):
228
- CASE_OP_32_64(orc):
229
- CASE_OP_32_64(eqv):
229
- CASE_OP_32_64(eqv):
230
- CASE_OP_32_64(nand):
230
- CASE_OP_32_64(nand):
231
- CASE_OP_32_64(nor):
231
- CASE_OP_32_64(nor):
232
- CASE_OP_32_64(muluh):
232
- CASE_OP_32_64(muluh):
233
- CASE_OP_32_64(mulsh):
233
- CASE_OP_32_64(mulsh):
234
- CASE_OP_32_64(div):
234
- CASE_OP_32_64(div):
235
- CASE_OP_32_64(divu):
235
- CASE_OP_32_64(divu):
236
- CASE_OP_32_64(rem):
236
- CASE_OP_32_64(rem):
237
- CASE_OP_32_64(remu):
237
- CASE_OP_32_64(remu):
238
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
238
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
239
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
239
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
240
- arg_info(op->args[2])->val);
240
- arg_info(op->args[2])->val);
241
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
241
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
242
- continue;
242
- continue;
243
- }
243
- }
244
- break;
244
- break;
245
-
245
-
246
CASE_OP_32_64(clz):
246
CASE_OP_32_64(clz):
247
CASE_OP_32_64(ctz):
247
CASE_OP_32_64(ctz):
248
if (arg_is_const(op->args[1])) {
248
if (arg_is_const(op->args[1])) {
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
}
250
}
251
break;
251
break;
252
252
253
+ default:
253
+ default:
254
+ break;
254
+ break;
255
+
255
+
256
+ /* ---------------------------------------------------------- */
256
+ /* ---------------------------------------------------------- */
257
+ /* Sorted alphabetically by opcode as much as possible. */
257
+ /* Sorted alphabetically by opcode as much as possible. */
258
+
258
+
259
+ CASE_OP_32_64_VEC(add):
259
+ CASE_OP_32_64_VEC(add):
260
+ done = fold_add(&ctx, op);
260
+ done = fold_add(&ctx, op);
261
+ break;
261
+ break;
262
+ CASE_OP_32_64_VEC(and):
262
+ CASE_OP_32_64_VEC(and):
263
+ done = fold_and(&ctx, op);
263
+ done = fold_and(&ctx, op);
264
+ break;
264
+ break;
265
+ CASE_OP_32_64_VEC(andc):
265
+ CASE_OP_32_64_VEC(andc):
266
+ done = fold_andc(&ctx, op);
266
+ done = fold_andc(&ctx, op);
267
+ break;
267
+ break;
268
+ CASE_OP_32_64(ctpop):
268
+ CASE_OP_32_64(ctpop):
269
+ done = fold_ctpop(&ctx, op);
269
+ done = fold_ctpop(&ctx, op);
270
+ break;
270
+ break;
271
+ CASE_OP_32_64(div):
271
+ CASE_OP_32_64(div):
272
+ CASE_OP_32_64(divu):
272
+ CASE_OP_32_64(divu):
273
+ done = fold_divide(&ctx, op);
273
+ done = fold_divide(&ctx, op);
274
+ break;
274
+ break;
275
+ CASE_OP_32_64(eqv):
275
+ CASE_OP_32_64(eqv):
276
+ done = fold_eqv(&ctx, op);
276
+ done = fold_eqv(&ctx, op);
277
+ break;
277
+ break;
278
+ CASE_OP_32_64(ext8s):
278
+ CASE_OP_32_64(ext8s):
279
+ CASE_OP_32_64(ext16s):
279
+ CASE_OP_32_64(ext16s):
280
+ case INDEX_op_ext32s_i64:
280
+ case INDEX_op_ext32s_i64:
281
+ case INDEX_op_ext_i32_i64:
281
+ case INDEX_op_ext_i32_i64:
282
+ done = fold_exts(&ctx, op);
282
+ done = fold_exts(&ctx, op);
283
+ break;
283
+ break;
284
+ CASE_OP_32_64(ext8u):
284
+ CASE_OP_32_64(ext8u):
285
+ CASE_OP_32_64(ext16u):
285
+ CASE_OP_32_64(ext16u):
286
+ case INDEX_op_ext32u_i64:
286
+ case INDEX_op_ext32u_i64:
287
+ case INDEX_op_extu_i32_i64:
287
+ case INDEX_op_extu_i32_i64:
288
+ case INDEX_op_extrl_i64_i32:
288
+ case INDEX_op_extrl_i64_i32:
289
+ case INDEX_op_extrh_i64_i32:
289
+ case INDEX_op_extrh_i64_i32:
290
+ done = fold_extu(&ctx, op);
290
+ done = fold_extu(&ctx, op);
291
+ break;
291
+ break;
292
case INDEX_op_mb:
292
case INDEX_op_mb:
293
done = fold_mb(&ctx, op);
293
done = fold_mb(&ctx, op);
294
break;
294
break;
295
+ CASE_OP_32_64(mul):
295
+ CASE_OP_32_64(mul):
296
+ done = fold_mul(&ctx, op);
296
+ done = fold_mul(&ctx, op);
297
+ break;
297
+ break;
298
+ CASE_OP_32_64(mulsh):
298
+ CASE_OP_32_64(mulsh):
299
+ CASE_OP_32_64(muluh):
299
+ CASE_OP_32_64(muluh):
300
+ done = fold_mul_highpart(&ctx, op);
300
+ done = fold_mul_highpart(&ctx, op);
301
+ break;
301
+ break;
302
+ CASE_OP_32_64(nand):
302
+ CASE_OP_32_64(nand):
303
+ done = fold_nand(&ctx, op);
303
+ done = fold_nand(&ctx, op);
304
+ break;
304
+ break;
305
+ CASE_OP_32_64(neg):
305
+ CASE_OP_32_64(neg):
306
+ done = fold_neg(&ctx, op);
306
+ done = fold_neg(&ctx, op);
307
+ break;
307
+ break;
308
+ CASE_OP_32_64(nor):
308
+ CASE_OP_32_64(nor):
309
+ done = fold_nor(&ctx, op);
309
+ done = fold_nor(&ctx, op);
310
+ break;
310
+ break;
311
+ CASE_OP_32_64_VEC(not):
311
+ CASE_OP_32_64_VEC(not):
312
+ done = fold_not(&ctx, op);
312
+ done = fold_not(&ctx, op);
313
+ break;
313
+ break;
314
+ CASE_OP_32_64_VEC(or):
314
+ CASE_OP_32_64_VEC(or):
315
+ done = fold_or(&ctx, op);
315
+ done = fold_or(&ctx, op);
316
+ break;
316
+ break;
317
+ CASE_OP_32_64_VEC(orc):
317
+ CASE_OP_32_64_VEC(orc):
318
+ done = fold_orc(&ctx, op);
318
+ done = fold_orc(&ctx, op);
319
+ break;
319
+ break;
320
case INDEX_op_qemu_ld_i32:
320
case INDEX_op_qemu_ld_i32:
321
case INDEX_op_qemu_ld_i64:
321
case INDEX_op_qemu_ld_i64:
322
done = fold_qemu_ld(&ctx, op);
322
done = fold_qemu_ld(&ctx, op);
323
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
323
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
324
case INDEX_op_qemu_st_i64:
324
case INDEX_op_qemu_st_i64:
325
done = fold_qemu_st(&ctx, op);
325
done = fold_qemu_st(&ctx, op);
326
break;
326
break;
327
-
327
-
328
- default:
328
- default:
329
+ CASE_OP_32_64(rem):
329
+ CASE_OP_32_64(rem):
330
+ CASE_OP_32_64(remu):
330
+ CASE_OP_32_64(remu):
331
+ done = fold_remainder(&ctx, op);
331
+ done = fold_remainder(&ctx, op);
332
+ break;
332
+ break;
333
+ CASE_OP_32_64(rotl):
333
+ CASE_OP_32_64(rotl):
334
+ CASE_OP_32_64(rotr):
334
+ CASE_OP_32_64(rotr):
335
+ CASE_OP_32_64(sar):
335
+ CASE_OP_32_64(sar):
336
+ CASE_OP_32_64(shl):
336
+ CASE_OP_32_64(shl):
337
+ CASE_OP_32_64(shr):
337
+ CASE_OP_32_64(shr):
338
+ done = fold_shift(&ctx, op);
338
+ done = fold_shift(&ctx, op);
339
+ break;
339
+ break;
340
+ CASE_OP_32_64_VEC(sub):
340
+ CASE_OP_32_64_VEC(sub):
341
+ done = fold_sub(&ctx, op);
341
+ done = fold_sub(&ctx, op);
342
+ break;
342
+ break;
343
+ CASE_OP_32_64_VEC(xor):
343
+ CASE_OP_32_64_VEC(xor):
344
+ done = fold_xor(&ctx, op);
344
+ done = fold_xor(&ctx, op);
345
break;
345
break;
346
}
346
}
347
347
348
--
348
--
349
2.25.1
349
2.25.1
350
350
351
351
diff view generated by jsdifflib
1
Reduce some code duplication by folding the NE and EQ cases.
1
Reduce some code duplication by folding the NE and EQ cases.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
7
tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
8
1 file changed, 72 insertions(+), 73 deletions(-)
8
1 file changed, 72 insertions(+), 73 deletions(-)
9
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
15
return fold_const2(ctx, op);
15
return fold_const2(ctx, op);
16
}
16
}
17
17
18
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
18
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
19
+{
19
+{
20
+ TCGCond cond = op->args[5];
20
+ TCGCond cond = op->args[5];
21
+ int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
21
+ int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
22
+ int inv = 0;
22
+ int inv = 0;
23
+
23
+
24
+ if (i >= 0) {
24
+ if (i >= 0) {
25
+ goto do_setcond_const;
25
+ goto do_setcond_const;
26
+ }
26
+ }
27
+
27
+
28
+ switch (cond) {
28
+ switch (cond) {
29
+ case TCG_COND_LT:
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
30
+ case TCG_COND_GE:
31
+ /*
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
33
+ * vs the high word of the input.
34
+ */
34
+ */
35
+ if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
35
+ if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
36
+ arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
36
+ arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
37
+ goto do_setcond_high;
37
+ goto do_setcond_high;
38
+ }
38
+ }
39
+ break;
39
+ break;
40
+
40
+
41
+ case TCG_COND_NE:
41
+ case TCG_COND_NE:
42
+ inv = 1;
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
44
+ case TCG_COND_EQ:
45
+ /*
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
47
+ * can be simplified.
48
+ */
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
49
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
50
+ op->args[3], cond);
50
+ op->args[3], cond);
51
+ switch (i ^ inv) {
51
+ switch (i ^ inv) {
52
+ case 0:
52
+ case 0:
53
+ goto do_setcond_const;
53
+ goto do_setcond_const;
54
+ case 1:
54
+ case 1:
55
+ goto do_setcond_high;
55
+ goto do_setcond_high;
56
+ }
56
+ }
57
+
57
+
58
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
58
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
59
+ op->args[4], cond);
59
+ op->args[4], cond);
60
+ switch (i ^ inv) {
60
+ switch (i ^ inv) {
61
+ case 0:
61
+ case 0:
62
+ goto do_setcond_const;
62
+ goto do_setcond_const;
63
+ case 1:
63
+ case 1:
64
+ op->args[2] = op->args[3];
64
+ op->args[2] = op->args[3];
65
+ op->args[3] = cond;
65
+ op->args[3] = cond;
66
+ op->opc = INDEX_op_setcond_i32;
66
+ op->opc = INDEX_op_setcond_i32;
67
+ break;
67
+ break;
68
+ }
68
+ }
69
+ break;
69
+ break;
70
+
70
+
71
+ default:
71
+ default:
72
+ break;
72
+ break;
73
+
73
+
74
+ do_setcond_high:
74
+ do_setcond_high:
75
+ op->args[1] = op->args[2];
75
+ op->args[1] = op->args[2];
76
+ op->args[2] = op->args[4];
76
+ op->args[2] = op->args[4];
77
+ op->args[3] = cond;
77
+ op->args[3] = cond;
78
+ op->opc = INDEX_op_setcond_i32;
78
+ op->opc = INDEX_op_setcond_i32;
79
+ break;
79
+ break;
80
+ }
80
+ }
81
+ return false;
81
+ return false;
82
+
82
+
83
+ do_setcond_const:
83
+ do_setcond_const:
84
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
84
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
85
+}
85
+}
86
+
86
+
87
static bool fold_shift(OptContext *ctx, TCGOp *op)
87
static bool fold_shift(OptContext *ctx, TCGOp *op)
88
{
88
{
89
return fold_const2(ctx, op);
89
return fold_const2(ctx, op);
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
91
}
91
}
92
break;
92
break;
93
93
94
- case INDEX_op_setcond2_i32:
94
- case INDEX_op_setcond2_i32:
95
- i = do_constant_folding_cond2(&op->args[1], &op->args[3],
95
- i = do_constant_folding_cond2(&op->args[1], &op->args[3],
96
- op->args[5]);
96
- op->args[5]);
97
- if (i >= 0) {
97
- if (i >= 0) {
98
- do_setcond_const:
98
- do_setcond_const:
99
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
99
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
100
- continue;
100
- continue;
101
- }
101
- }
102
- if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
102
- if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
103
- && arg_is_const(op->args[3])
103
- && arg_is_const(op->args[3])
104
- && arg_info(op->args[3])->val == 0
104
- && arg_info(op->args[3])->val == 0
105
- && arg_is_const(op->args[4])
105
- && arg_is_const(op->args[4])
106
- && arg_info(op->args[4])->val == 0) {
106
- && arg_info(op->args[4])->val == 0) {
107
- /* Simplify LT/GE comparisons vs zero to a single compare
107
- /* Simplify LT/GE comparisons vs zero to a single compare
108
- vs the high word of the input. */
108
- vs the high word of the input. */
109
- do_setcond_high:
109
- do_setcond_high:
110
- reset_temp(op->args[0]);
110
- reset_temp(op->args[0]);
111
- arg_info(op->args[0])->z_mask = 1;
111
- arg_info(op->args[0])->z_mask = 1;
112
- op->opc = INDEX_op_setcond_i32;
112
- op->opc = INDEX_op_setcond_i32;
113
- op->args[1] = op->args[2];
113
- op->args[1] = op->args[2];
114
- op->args[2] = op->args[4];
114
- op->args[2] = op->args[4];
115
- op->args[3] = op->args[5];
115
- op->args[3] = op->args[5];
116
- break;
116
- break;
117
- }
117
- }
118
- if (op->args[5] == TCG_COND_EQ) {
118
- if (op->args[5] == TCG_COND_EQ) {
119
- /* Simplify EQ comparisons where one of the pairs
119
- /* Simplify EQ comparisons where one of the pairs
120
- can be simplified. */
120
- can be simplified. */
121
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
121
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
122
- op->args[1], op->args[3],
122
- op->args[1], op->args[3],
123
- TCG_COND_EQ);
123
- TCG_COND_EQ);
124
- if (i == 0) {
124
- if (i == 0) {
125
- goto do_setcond_const;
125
- goto do_setcond_const;
126
- } else if (i > 0) {
126
- } else if (i > 0) {
127
- goto do_setcond_high;
127
- goto do_setcond_high;
128
- }
128
- }
129
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
129
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
130
- op->args[2], op->args[4],
130
- op->args[2], op->args[4],
131
- TCG_COND_EQ);
131
- TCG_COND_EQ);
132
- if (i == 0) {
132
- if (i == 0) {
133
- goto do_setcond_high;
133
- goto do_setcond_high;
134
- } else if (i < 0) {
134
- } else if (i < 0) {
135
- break;
135
- break;
136
- }
136
- }
137
- do_setcond_low:
137
- do_setcond_low:
138
- reset_temp(op->args[0]);
138
- reset_temp(op->args[0]);
139
- arg_info(op->args[0])->z_mask = 1;
139
- arg_info(op->args[0])->z_mask = 1;
140
- op->opc = INDEX_op_setcond_i32;
140
- op->opc = INDEX_op_setcond_i32;
141
- op->args[2] = op->args[3];
141
- op->args[2] = op->args[3];
142
- op->args[3] = op->args[5];
142
- op->args[3] = op->args[5];
143
- break;
143
- break;
144
- }
144
- }
145
- if (op->args[5] == TCG_COND_NE) {
145
- if (op->args[5] == TCG_COND_NE) {
146
- /* Simplify NE comparisons where one of the pairs
146
- /* Simplify NE comparisons where one of the pairs
147
- can be simplified. */
147
- can be simplified. */
148
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
148
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
149
- op->args[1], op->args[3],
149
- op->args[1], op->args[3],
150
- TCG_COND_NE);
150
- TCG_COND_NE);
151
- if (i == 0) {
151
- if (i == 0) {
152
- goto do_setcond_high;
152
- goto do_setcond_high;
153
- } else if (i > 0) {
153
- } else if (i > 0) {
154
- goto do_setcond_const;
154
- goto do_setcond_const;
155
- }
155
- }
156
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
156
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
157
- op->args[2], op->args[4],
157
- op->args[2], op->args[4],
158
- TCG_COND_NE);
158
- TCG_COND_NE);
159
- if (i == 0) {
159
- if (i == 0) {
160
- goto do_setcond_low;
160
- goto do_setcond_low;
161
- } else if (i > 0) {
161
- } else if (i > 0) {
162
- goto do_setcond_const;
162
- goto do_setcond_const;
163
- }
163
- }
164
- }
164
- }
165
- break;
165
- break;
166
-
166
-
167
default:
167
default:
168
break;
168
break;
169
169
170
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
170
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
171
CASE_OP_32_64(shr):
171
CASE_OP_32_64(shr):
172
done = fold_shift(&ctx, op);
172
done = fold_shift(&ctx, op);
173
break;
173
break;
174
+ case INDEX_op_setcond2_i32:
174
+ case INDEX_op_setcond2_i32:
175
+ done = fold_setcond2(&ctx, op);
175
+ done = fold_setcond2(&ctx, op);
176
+ break;
176
+ break;
177
CASE_OP_32_64_VEC(sub):
177
CASE_OP_32_64_VEC(sub):
178
done = fold_sub(&ctx, op);
178
done = fold_sub(&ctx, op);
179
break;
179
break;
180
--
180
--
181
2.25.1
181
2.25.1
182
182
183
183
diff view generated by jsdifflib
1
Reduce some code duplication by folding the NE and EQ cases.
1
Reduce some code duplication by folding the NE and EQ cases.
2
2
3
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
5
---
6
tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
6
tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
7
1 file changed, 81 insertions(+), 78 deletions(-)
7
1 file changed, 81 insertions(+), 78 deletions(-)
8
8
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
14
return fold_const2(ctx, op);
14
return fold_const2(ctx, op);
15
}
15
}
16
16
17
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
17
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
18
+{
18
+{
19
+ TCGCond cond = op->args[4];
19
+ TCGCond cond = op->args[4];
20
+ int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
20
+ int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
21
+ TCGArg label = op->args[5];
21
+ TCGArg label = op->args[5];
22
+ int inv = 0;
22
+ int inv = 0;
23
+
23
+
24
+ if (i >= 0) {
24
+ if (i >= 0) {
25
+ goto do_brcond_const;
25
+ goto do_brcond_const;
26
+ }
26
+ }
27
+
27
+
28
+ switch (cond) {
28
+ switch (cond) {
29
+ case TCG_COND_LT:
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
30
+ case TCG_COND_GE:
31
+ /*
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
33
+ * vs the high word of the input.
34
+ */
34
+ */
35
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
35
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
36
+ arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
36
+ arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
37
+ goto do_brcond_high;
37
+ goto do_brcond_high;
38
+ }
38
+ }
39
+ break;
39
+ break;
40
+
40
+
41
+ case TCG_COND_NE:
41
+ case TCG_COND_NE:
42
+ inv = 1;
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
44
+ case TCG_COND_EQ:
45
+ /*
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
47
+ * can be simplified.
48
+ */
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
49
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
50
+ op->args[2], cond);
50
+ op->args[2], cond);
51
+ switch (i ^ inv) {
51
+ switch (i ^ inv) {
52
+ case 0:
52
+ case 0:
53
+ goto do_brcond_const;
53
+ goto do_brcond_const;
54
+ case 1:
54
+ case 1:
55
+ goto do_brcond_high;
55
+ goto do_brcond_high;
56
+ }
56
+ }
57
+
57
+
58
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
58
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
59
+ op->args[3], cond);
59
+ op->args[3], cond);
60
+ switch (i ^ inv) {
60
+ switch (i ^ inv) {
61
+ case 0:
61
+ case 0:
62
+ goto do_brcond_const;
62
+ goto do_brcond_const;
63
+ case 1:
63
+ case 1:
64
+ op->opc = INDEX_op_brcond_i32;
64
+ op->opc = INDEX_op_brcond_i32;
65
+ op->args[1] = op->args[2];
65
+ op->args[1] = op->args[2];
66
+ op->args[2] = cond;
66
+ op->args[2] = cond;
67
+ op->args[3] = label;
67
+ op->args[3] = label;
68
+ break;
68
+ break;
69
+ }
69
+ }
70
+ break;
70
+ break;
71
+
71
+
72
+ default:
72
+ default:
73
+ break;
73
+ break;
74
+
74
+
75
+ do_brcond_high:
75
+ do_brcond_high:
76
+ op->opc = INDEX_op_brcond_i32;
76
+ op->opc = INDEX_op_brcond_i32;
77
+ op->args[0] = op->args[1];
77
+ op->args[0] = op->args[1];
78
+ op->args[1] = op->args[3];
78
+ op->args[1] = op->args[3];
79
+ op->args[2] = cond;
79
+ op->args[2] = cond;
80
+ op->args[3] = label;
80
+ op->args[3] = label;
81
+ break;
81
+ break;
82
+
82
+
83
+ do_brcond_const:
83
+ do_brcond_const:
84
+ if (i == 0) {
84
+ if (i == 0) {
85
+ tcg_op_remove(ctx->tcg, op);
85
+ tcg_op_remove(ctx->tcg, op);
86
+ return true;
86
+ return true;
87
+ }
87
+ }
88
+ op->opc = INDEX_op_br;
88
+ op->opc = INDEX_op_br;
89
+ op->args[0] = label;
89
+ op->args[0] = label;
90
+ break;
90
+ break;
91
+ }
91
+ }
92
+ return false;
92
+ return false;
93
+}
93
+}
94
+
94
+
95
static bool fold_call(OptContext *ctx, TCGOp *op)
95
static bool fold_call(OptContext *ctx, TCGOp *op)
96
{
96
{
97
TCGContext *s = ctx->tcg;
97
TCGContext *s = ctx->tcg;
98
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
98
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
99
}
99
}
100
break;
100
break;
101
101
102
- case INDEX_op_brcond2_i32:
102
- case INDEX_op_brcond2_i32:
103
- i = do_constant_folding_cond2(&op->args[0], &op->args[2],
103
- i = do_constant_folding_cond2(&op->args[0], &op->args[2],
104
- op->args[4]);
104
- op->args[4]);
105
- if (i == 0) {
105
- if (i == 0) {
106
- do_brcond_false:
106
- do_brcond_false:
107
- tcg_op_remove(s, op);
107
- tcg_op_remove(s, op);
108
- continue;
108
- continue;
109
- }
109
- }
110
- if (i > 0) {
110
- if (i > 0) {
111
- do_brcond_true:
111
- do_brcond_true:
112
- op->opc = opc = INDEX_op_br;
112
- op->opc = opc = INDEX_op_br;
113
- op->args[0] = op->args[5];
113
- op->args[0] = op->args[5];
114
- break;
114
- break;
115
- }
115
- }
116
- if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
116
- if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
117
- && arg_is_const(op->args[2])
117
- && arg_is_const(op->args[2])
118
- && arg_info(op->args[2])->val == 0
118
- && arg_info(op->args[2])->val == 0
119
- && arg_is_const(op->args[3])
119
- && arg_is_const(op->args[3])
120
- && arg_info(op->args[3])->val == 0) {
120
- && arg_info(op->args[3])->val == 0) {
121
- /* Simplify LT/GE comparisons vs zero to a single compare
121
- /* Simplify LT/GE comparisons vs zero to a single compare
122
- vs the high word of the input. */
122
- vs the high word of the input. */
123
- do_brcond_high:
123
- do_brcond_high:
124
- op->opc = opc = INDEX_op_brcond_i32;
124
- op->opc = opc = INDEX_op_brcond_i32;
125
- op->args[0] = op->args[1];
125
- op->args[0] = op->args[1];
126
- op->args[1] = op->args[3];
126
- op->args[1] = op->args[3];
127
- op->args[2] = op->args[4];
127
- op->args[2] = op->args[4];
128
- op->args[3] = op->args[5];
128
- op->args[3] = op->args[5];
129
- break;
129
- break;
130
- }
130
- }
131
- if (op->args[4] == TCG_COND_EQ) {
131
- if (op->args[4] == TCG_COND_EQ) {
132
- /* Simplify EQ comparisons where one of the pairs
132
- /* Simplify EQ comparisons where one of the pairs
133
- can be simplified. */
133
- can be simplified. */
134
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
134
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
135
- op->args[0], op->args[2],
135
- op->args[0], op->args[2],
136
- TCG_COND_EQ);
136
- TCG_COND_EQ);
137
- if (i == 0) {
137
- if (i == 0) {
138
- goto do_brcond_false;
138
- goto do_brcond_false;
139
- } else if (i > 0) {
139
- } else if (i > 0) {
140
- goto do_brcond_high;
140
- goto do_brcond_high;
141
- }
141
- }
142
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
142
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
143
- op->args[1], op->args[3],
143
- op->args[1], op->args[3],
144
- TCG_COND_EQ);
144
- TCG_COND_EQ);
145
- if (i == 0) {
145
- if (i == 0) {
146
- goto do_brcond_false;
146
- goto do_brcond_false;
147
- } else if (i < 0) {
147
- } else if (i < 0) {
148
- break;
148
- break;
149
- }
149
- }
150
- do_brcond_low:
150
- do_brcond_low:
151
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
151
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
152
- op->opc = INDEX_op_brcond_i32;
152
- op->opc = INDEX_op_brcond_i32;
153
- op->args[1] = op->args[2];
153
- op->args[1] = op->args[2];
154
- op->args[2] = op->args[4];
154
- op->args[2] = op->args[4];
155
- op->args[3] = op->args[5];
155
- op->args[3] = op->args[5];
156
- break;
156
- break;
157
- }
157
- }
158
- if (op->args[4] == TCG_COND_NE) {
158
- if (op->args[4] == TCG_COND_NE) {
159
- /* Simplify NE comparisons where one of the pairs
159
- /* Simplify NE comparisons where one of the pairs
160
- can be simplified. */
160
- can be simplified. */
161
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
161
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
162
- op->args[0], op->args[2],
162
- op->args[0], op->args[2],
163
- TCG_COND_NE);
163
- TCG_COND_NE);
164
- if (i == 0) {
164
- if (i == 0) {
165
- goto do_brcond_high;
165
- goto do_brcond_high;
166
- } else if (i > 0) {
166
- } else if (i > 0) {
167
- goto do_brcond_true;
167
- goto do_brcond_true;
168
- }
168
- }
169
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
169
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
170
- op->args[1], op->args[3],
170
- op->args[1], op->args[3],
171
- TCG_COND_NE);
171
- TCG_COND_NE);
172
- if (i == 0) {
172
- if (i == 0) {
173
- goto do_brcond_low;
173
- goto do_brcond_low;
174
- } else if (i > 0) {
174
- } else if (i > 0) {
175
- goto do_brcond_true;
175
- goto do_brcond_true;
176
- }
176
- }
177
- }
177
- }
178
- break;
178
- break;
179
-
179
-
180
default:
180
default:
181
break;
181
break;
182
182
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
CASE_OP_32_64_VEC(andc):
184
CASE_OP_32_64_VEC(andc):
185
done = fold_andc(&ctx, op);
185
done = fold_andc(&ctx, op);
186
break;
186
break;
187
+ case INDEX_op_brcond2_i32:
187
+ case INDEX_op_brcond2_i32:
188
+ done = fold_brcond2(&ctx, op);
188
+ done = fold_brcond2(&ctx, op);
189
+ break;
189
+ break;
190
CASE_OP_32_64(ctpop):
190
CASE_OP_32_64(ctpop):
191
done = fold_ctpop(&ctx, op);
191
done = fold_ctpop(&ctx, op);
192
break;
192
break;
193
--
193
--
194
2.25.1
194
2.25.1
195
195
196
196
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 33 +++++++++++++++++++--------------
5
tcg/optimize.c | 33 +++++++++++++++++++--------------
6
1 file changed, 19 insertions(+), 14 deletions(-)
6
1 file changed, 19 insertions(+), 14 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
13
return fold_const2(ctx, op);
14
}
14
}
15
15
16
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
16
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ TCGCond cond = op->args[2];
18
+ TCGCond cond = op->args[2];
19
+ int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
19
+ int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
20
+
20
+
21
+ if (i == 0) {
21
+ if (i == 0) {
22
+ tcg_op_remove(ctx->tcg, op);
22
+ tcg_op_remove(ctx->tcg, op);
23
+ return true;
23
+ return true;
24
+ }
24
+ }
25
+ if (i > 0) {
25
+ if (i > 0) {
26
+ op->opc = INDEX_op_br;
26
+ op->opc = INDEX_op_br;
27
+ op->args[0] = op->args[3];
27
+ op->args[0] = op->args[3];
28
+ }
28
+ }
29
+ return false;
29
+ return false;
30
+}
30
+}
31
+
31
+
32
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
32
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
33
{
33
{
34
TCGCond cond = op->args[4];
34
TCGCond cond = op->args[4];
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
36
}
36
}
37
break;
37
break;
38
38
39
- CASE_OP_32_64(brcond):
39
- CASE_OP_32_64(brcond):
40
- i = do_constant_folding_cond(opc, op->args[0],
40
- i = do_constant_folding_cond(opc, op->args[0],
41
- op->args[1], op->args[2]);
41
- op->args[1], op->args[2]);
42
- if (i == 0) {
42
- if (i == 0) {
43
- tcg_op_remove(s, op);
43
- tcg_op_remove(s, op);
44
- continue;
44
- continue;
45
- } else if (i > 0) {
45
- } else if (i > 0) {
46
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
46
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
47
- op->opc = opc = INDEX_op_br;
47
- op->opc = opc = INDEX_op_br;
48
- op->args[0] = op->args[3];
48
- op->args[0] = op->args[3];
49
- break;
49
- break;
50
- }
50
- }
51
- break;
51
- break;
52
-
52
-
53
CASE_OP_32_64(movcond):
53
CASE_OP_32_64(movcond):
54
i = do_constant_folding_cond(opc, op->args[1],
54
i = do_constant_folding_cond(opc, op->args[1],
55
op->args[2], op->args[5]);
55
op->args[2], op->args[5]);
56
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
56
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
57
CASE_OP_32_64_VEC(andc):
57
CASE_OP_32_64_VEC(andc):
58
done = fold_andc(&ctx, op);
58
done = fold_andc(&ctx, op);
59
break;
59
break;
60
+ CASE_OP_32_64(brcond):
60
+ CASE_OP_32_64(brcond):
61
+ done = fold_brcond(&ctx, op);
61
+ done = fold_brcond(&ctx, op);
62
+ break;
62
+ break;
63
case INDEX_op_brcond2_i32:
63
case INDEX_op_brcond2_i32:
64
done = fold_brcond2(&ctx, op);
64
done = fold_brcond2(&ctx, op);
65
break;
65
break;
66
--
66
--
67
2.25.1
67
2.25.1
68
68
69
69
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 23 ++++++++++++++---------
5
tcg/optimize.c | 23 ++++++++++++++---------
6
1 file changed, 14 insertions(+), 9 deletions(-)
6
1 file changed, 14 insertions(+), 9 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
13
return fold_const2(ctx, op);
14
}
14
}
15
15
16
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
16
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ TCGCond cond = op->args[3];
18
+ TCGCond cond = op->args[3];
19
+ int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
19
+ int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
20
+
20
+
21
+ if (i >= 0) {
21
+ if (i >= 0) {
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
+ }
23
+ }
24
+ return false;
24
+ return false;
25
+}
25
+}
26
+
26
+
27
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
27
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
28
{
28
{
29
TCGCond cond = op->args[5];
29
TCGCond cond = op->args[5];
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
}
31
}
32
break;
32
break;
33
33
34
- CASE_OP_32_64(setcond):
34
- CASE_OP_32_64(setcond):
35
- i = do_constant_folding_cond(opc, op->args[1],
35
- i = do_constant_folding_cond(opc, op->args[1],
36
- op->args[2], op->args[3]);
36
- op->args[2], op->args[3]);
37
- if (i >= 0) {
37
- if (i >= 0) {
38
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
38
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
39
- continue;
39
- continue;
40
- }
40
- }
41
- break;
41
- break;
42
-
42
-
43
CASE_OP_32_64(movcond):
43
CASE_OP_32_64(movcond):
44
i = do_constant_folding_cond(opc, op->args[1],
44
i = do_constant_folding_cond(opc, op->args[1],
45
op->args[2], op->args[5]);
45
op->args[2], op->args[5]);
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
47
CASE_OP_32_64(shr):
47
CASE_OP_32_64(shr):
48
done = fold_shift(&ctx, op);
48
done = fold_shift(&ctx, op);
49
break;
49
break;
50
+ CASE_OP_32_64(setcond):
50
+ CASE_OP_32_64(setcond):
51
+ done = fold_setcond(&ctx, op);
51
+ done = fold_setcond(&ctx, op);
52
+ break;
52
+ break;
53
case INDEX_op_setcond2_i32:
53
case INDEX_op_setcond2_i32:
54
done = fold_setcond2(&ctx, op);
54
done = fold_setcond2(&ctx, op);
55
break;
55
break;
56
--
56
--
57
2.25.1
57
2.25.1
58
58
59
59
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 37 +++++++++++++++++++++----------------
5
tcg/optimize.c | 37 +++++++++++++++++++++----------------
6
1 file changed, 21 insertions(+), 16 deletions(-)
6
1 file changed, 21 insertions(+), 16 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
13
return fold_const2(ctx, op);
14
}
14
}
15
15
16
+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
16
+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
18
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
19
+ uint32_t a = arg_info(op->args[2])->val;
19
+ uint32_t a = arg_info(op->args[2])->val;
20
+ uint32_t b = arg_info(op->args[3])->val;
20
+ uint32_t b = arg_info(op->args[3])->val;
21
+ uint64_t r = (uint64_t)a * b;
21
+ uint64_t r = (uint64_t)a * b;
22
+ TCGArg rl, rh;
22
+ TCGArg rl, rh;
23
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
23
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
24
+
24
+
25
+ rl = op->args[0];
25
+ rl = op->args[0];
26
+ rh = op->args[1];
26
+ rh = op->args[1];
27
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
27
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
28
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
28
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
29
+ return true;
29
+ return true;
30
+ }
30
+ }
31
+ return false;
31
+ return false;
32
+}
32
+}
33
+
33
+
34
static bool fold_nand(OptContext *ctx, TCGOp *op)
34
static bool fold_nand(OptContext *ctx, TCGOp *op)
35
{
35
{
36
return fold_const2(ctx, op);
36
return fold_const2(ctx, op);
37
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
37
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
38
}
38
}
39
break;
39
break;
40
40
41
- case INDEX_op_mulu2_i32:
41
- case INDEX_op_mulu2_i32:
42
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
42
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
43
- uint32_t a = arg_info(op->args[2])->val;
43
- uint32_t a = arg_info(op->args[2])->val;
44
- uint32_t b = arg_info(op->args[3])->val;
44
- uint32_t b = arg_info(op->args[3])->val;
45
- uint64_t r = (uint64_t)a * b;
45
- uint64_t r = (uint64_t)a * b;
46
- TCGArg rl, rh;
46
- TCGArg rl, rh;
47
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
47
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
48
-
48
-
49
- rl = op->args[0];
49
- rl = op->args[0];
50
- rh = op->args[1];
50
- rh = op->args[1];
51
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
51
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
52
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
52
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
53
- continue;
53
- continue;
54
- }
54
- }
55
- break;
55
- break;
56
-
56
-
57
default:
57
default:
58
break;
58
break;
59
59
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
CASE_OP_32_64(muluh):
61
CASE_OP_32_64(muluh):
62
done = fold_mul_highpart(&ctx, op);
62
done = fold_mul_highpart(&ctx, op);
63
break;
63
break;
64
+ case INDEX_op_mulu2_i32:
64
+ case INDEX_op_mulu2_i32:
65
+ done = fold_mulu2_i32(&ctx, op);
65
+ done = fold_mulu2_i32(&ctx, op);
66
+ break;
66
+ break;
67
CASE_OP_32_64(nand):
67
CASE_OP_32_64(nand):
68
done = fold_nand(&ctx, op);
68
done = fold_nand(&ctx, op);
69
break;
69
break;
70
--
70
--
71
2.25.1
71
2.25.1
72
72
73
73
diff view generated by jsdifflib
1
Add two additional helpers, fold_add2_i32 and fold_sub2_i32
1
Add two additional helpers, fold_add2_i32 and fold_sub2_i32
2
which will not be simple wrappers forever.
2
which will not be simple wrappers forever.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
8
tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
9
1 file changed, 44 insertions(+), 26 deletions(-)
9
1 file changed, 44 insertions(+), 26 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
16
return fold_const2(ctx, op);
16
return fold_const2(ctx, op);
17
}
17
}
18
18
19
+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
19
+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
20
+{
20
+{
21
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
21
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
22
+ arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
22
+ arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
23
+ uint32_t al = arg_info(op->args[2])->val;
23
+ uint32_t al = arg_info(op->args[2])->val;
24
+ uint32_t ah = arg_info(op->args[3])->val;
24
+ uint32_t ah = arg_info(op->args[3])->val;
25
+ uint32_t bl = arg_info(op->args[4])->val;
25
+ uint32_t bl = arg_info(op->args[4])->val;
26
+ uint32_t bh = arg_info(op->args[5])->val;
26
+ uint32_t bh = arg_info(op->args[5])->val;
27
+ uint64_t a = ((uint64_t)ah << 32) | al;
27
+ uint64_t a = ((uint64_t)ah << 32) | al;
28
+ uint64_t b = ((uint64_t)bh << 32) | bl;
28
+ uint64_t b = ((uint64_t)bh << 32) | bl;
29
+ TCGArg rl, rh;
29
+ TCGArg rl, rh;
30
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
30
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
31
+
31
+
32
+ if (add) {
32
+ if (add) {
33
+ a += b;
33
+ a += b;
34
+ } else {
34
+ } else {
35
+ a -= b;
35
+ a -= b;
36
+ }
36
+ }
37
+
37
+
38
+ rl = op->args[0];
38
+ rl = op->args[0];
39
+ rh = op->args[1];
39
+ rh = op->args[1];
40
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
40
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
41
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
41
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
42
+ return true;
42
+ return true;
43
+ }
43
+ }
44
+ return false;
44
+ return false;
45
+}
45
+}
46
+
46
+
47
+static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
47
+static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
48
+{
48
+{
49
+ return fold_addsub2_i32(ctx, op, true);
49
+ return fold_addsub2_i32(ctx, op, true);
50
+}
50
+}
51
+
51
+
52
static bool fold_and(OptContext *ctx, TCGOp *op)
52
static bool fold_and(OptContext *ctx, TCGOp *op)
53
{
53
{
54
return fold_const2(ctx, op);
54
return fold_const2(ctx, op);
55
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
55
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
56
return fold_const2(ctx, op);
56
return fold_const2(ctx, op);
57
}
57
}
58
58
59
+static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
59
+static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
60
+{
60
+{
61
+ return fold_addsub2_i32(ctx, op, false);
61
+ return fold_addsub2_i32(ctx, op, false);
62
+}
62
+}
63
+
63
+
64
static bool fold_xor(OptContext *ctx, TCGOp *op)
64
static bool fold_xor(OptContext *ctx, TCGOp *op)
65
{
65
{
66
return fold_const2(ctx, op);
66
return fold_const2(ctx, op);
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
}
68
}
69
break;
69
break;
70
70
71
- case INDEX_op_add2_i32:
71
- case INDEX_op_add2_i32:
72
- case INDEX_op_sub2_i32:
72
- case INDEX_op_sub2_i32:
73
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
73
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
74
- && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
74
- && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
75
- uint32_t al = arg_info(op->args[2])->val;
75
- uint32_t al = arg_info(op->args[2])->val;
76
- uint32_t ah = arg_info(op->args[3])->val;
76
- uint32_t ah = arg_info(op->args[3])->val;
77
- uint32_t bl = arg_info(op->args[4])->val;
77
- uint32_t bl = arg_info(op->args[4])->val;
78
- uint32_t bh = arg_info(op->args[5])->val;
78
- uint32_t bh = arg_info(op->args[5])->val;
79
- uint64_t a = ((uint64_t)ah << 32) | al;
79
- uint64_t a = ((uint64_t)ah << 32) | al;
80
- uint64_t b = ((uint64_t)bh << 32) | bl;
80
- uint64_t b = ((uint64_t)bh << 32) | bl;
81
- TCGArg rl, rh;
81
- TCGArg rl, rh;
82
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
82
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
83
-
83
-
84
- if (opc == INDEX_op_add2_i32) {
84
- if (opc == INDEX_op_add2_i32) {
85
- a += b;
85
- a += b;
86
- } else {
86
- } else {
87
- a -= b;
87
- a -= b;
88
- }
88
- }
89
-
89
-
90
- rl = op->args[0];
90
- rl = op->args[0];
91
- rh = op->args[1];
91
- rh = op->args[1];
92
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
92
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
93
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
93
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
94
- continue;
94
- continue;
95
- }
95
- }
96
- break;
96
- break;
97
97
98
default:
98
default:
99
break;
99
break;
100
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
100
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
101
CASE_OP_32_64_VEC(add):
101
CASE_OP_32_64_VEC(add):
102
done = fold_add(&ctx, op);
102
done = fold_add(&ctx, op);
103
break;
103
break;
104
+ case INDEX_op_add2_i32:
104
+ case INDEX_op_add2_i32:
105
+ done = fold_add2_i32(&ctx, op);
105
+ done = fold_add2_i32(&ctx, op);
106
+ break;
106
+ break;
107
CASE_OP_32_64_VEC(and):
107
CASE_OP_32_64_VEC(and):
108
done = fold_and(&ctx, op);
108
done = fold_and(&ctx, op);
109
break;
109
break;
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64_VEC(sub):
111
CASE_OP_32_64_VEC(sub):
112
done = fold_sub(&ctx, op);
112
done = fold_sub(&ctx, op);
113
break;
113
break;
114
+ case INDEX_op_sub2_i32:
114
+ case INDEX_op_sub2_i32:
115
+ done = fold_sub2_i32(&ctx, op);
115
+ done = fold_sub2_i32(&ctx, op);
116
+ break;
116
+ break;
117
CASE_OP_32_64_VEC(xor):
117
CASE_OP_32_64_VEC(xor):
118
done = fold_xor(&ctx, op);
118
done = fold_xor(&ctx, op);
119
break;
119
break;
120
--
120
--
121
2.25.1
121
2.25.1
122
122
123
123
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
5
tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
6
1 file changed, 31 insertions(+), 25 deletions(-)
6
1 file changed, 31 insertions(+), 25 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
13
return true;
13
return true;
14
}
14
}
15
15
16
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
16
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ TCGOpcode opc = op->opc;
18
+ TCGOpcode opc = op->opc;
19
+ TCGCond cond = op->args[5];
19
+ TCGCond cond = op->args[5];
20
+ int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
20
+ int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
21
+
21
+
22
+ if (i >= 0) {
22
+ if (i >= 0) {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
24
+ }
24
+ }
25
+
25
+
26
+ if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
26
+ if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
27
+ uint64_t tv = arg_info(op->args[3])->val;
27
+ uint64_t tv = arg_info(op->args[3])->val;
28
+ uint64_t fv = arg_info(op->args[4])->val;
28
+ uint64_t fv = arg_info(op->args[4])->val;
29
+
29
+
30
+ opc = (opc == INDEX_op_movcond_i32
30
+ opc = (opc == INDEX_op_movcond_i32
31
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
31
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
32
+
32
+
33
+ if (tv == 1 && fv == 0) {
33
+ if (tv == 1 && fv == 0) {
34
+ op->opc = opc;
34
+ op->opc = opc;
35
+ op->args[3] = cond;
35
+ op->args[3] = cond;
36
+ } else if (fv == 1 && tv == 0) {
36
+ } else if (fv == 1 && tv == 0) {
37
+ op->opc = opc;
37
+ op->opc = opc;
38
+ op->args[3] = tcg_invert_cond(cond);
38
+ op->args[3] = tcg_invert_cond(cond);
39
+ }
39
+ }
40
+ }
40
+ }
41
+ return false;
41
+ return false;
42
+}
42
+}
43
+
43
+
44
static bool fold_mul(OptContext *ctx, TCGOp *op)
44
static bool fold_mul(OptContext *ctx, TCGOp *op)
45
{
45
{
46
return fold_const2(ctx, op);
46
return fold_const2(ctx, op);
47
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
47
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
}
48
}
49
break;
49
break;
50
50
51
- CASE_OP_32_64(movcond):
51
- CASE_OP_32_64(movcond):
52
- i = do_constant_folding_cond(opc, op->args[1],
52
- i = do_constant_folding_cond(opc, op->args[1],
53
- op->args[2], op->args[5]);
53
- op->args[2], op->args[5]);
54
- if (i >= 0) {
54
- if (i >= 0) {
55
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
55
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
56
- continue;
56
- continue;
57
- }
57
- }
58
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
58
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
59
- uint64_t tv = arg_info(op->args[3])->val;
59
- uint64_t tv = arg_info(op->args[3])->val;
60
- uint64_t fv = arg_info(op->args[4])->val;
60
- uint64_t fv = arg_info(op->args[4])->val;
61
- TCGCond cond = op->args[5];
61
- TCGCond cond = op->args[5];
62
-
62
-
63
- if (fv == 1 && tv == 0) {
63
- if (fv == 1 && tv == 0) {
64
- cond = tcg_invert_cond(cond);
64
- cond = tcg_invert_cond(cond);
65
- } else if (!(tv == 1 && fv == 0)) {
65
- } else if (!(tv == 1 && fv == 0)) {
66
- break;
66
- break;
67
- }
67
- }
68
- op->args[3] = cond;
68
- op->args[3] = cond;
69
- op->opc = opc = (opc == INDEX_op_movcond_i32
69
- op->opc = opc = (opc == INDEX_op_movcond_i32
70
- ? INDEX_op_setcond_i32
70
- ? INDEX_op_setcond_i32
71
- : INDEX_op_setcond_i64);
71
- : INDEX_op_setcond_i64);
72
- }
72
- }
73
- break;
73
- break;
74
-
74
-
75
-
75
-
76
default:
76
default:
77
break;
77
break;
78
78
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
case INDEX_op_mb:
80
case INDEX_op_mb:
81
done = fold_mb(&ctx, op);
81
done = fold_mb(&ctx, op);
82
break;
82
break;
83
+ CASE_OP_32_64(movcond):
83
+ CASE_OP_32_64(movcond):
84
+ done = fold_movcond(&ctx, op);
84
+ done = fold_movcond(&ctx, op);
85
+ break;
85
+ break;
86
CASE_OP_32_64(mul):
86
CASE_OP_32_64(mul):
87
done = fold_mul(&ctx, op);
87
done = fold_mul(&ctx, op);
88
break;
88
break;
89
--
89
--
90
2.25.1
90
2.25.1
91
91
92
92
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
5
tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
6
1 file changed, 22 insertions(+), 17 deletions(-)
6
1 file changed, 22 insertions(+), 17 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
13
return fold_const2(ctx, op);
14
}
14
}
15
15
16
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
16
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
19
+ uint64_t v1 = arg_info(op->args[1])->val;
19
+ uint64_t v1 = arg_info(op->args[1])->val;
20
+ uint64_t v2 = arg_info(op->args[2])->val;
20
+ uint64_t v2 = arg_info(op->args[2])->val;
21
+ int shr = op->args[3];
21
+ int shr = op->args[3];
22
+
22
+
23
+ if (op->opc == INDEX_op_extract2_i64) {
23
+ if (op->opc == INDEX_op_extract2_i64) {
24
+ v1 >>= shr;
24
+ v1 >>= shr;
25
+ v2 <<= 64 - shr;
25
+ v2 <<= 64 - shr;
26
+ } else {
26
+ } else {
27
+ v1 = (uint32_t)v1 >> shr;
27
+ v1 = (uint32_t)v1 >> shr;
28
+ v2 = (int32_t)v2 << (32 - shr);
28
+ v2 = (int32_t)v2 << (32 - shr);
29
+ }
29
+ }
30
+ return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
30
+ return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
31
+ }
31
+ }
32
+ return false;
32
+ return false;
33
+}
33
+}
34
+
34
+
35
static bool fold_exts(OptContext *ctx, TCGOp *op)
35
static bool fold_exts(OptContext *ctx, TCGOp *op)
36
{
36
{
37
return fold_const1(ctx, op);
37
return fold_const1(ctx, op);
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
39
}
39
}
40
break;
40
break;
41
41
42
- CASE_OP_32_64(extract2):
42
- CASE_OP_32_64(extract2):
43
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
43
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
44
- uint64_t v1 = arg_info(op->args[1])->val;
44
- uint64_t v1 = arg_info(op->args[1])->val;
45
- uint64_t v2 = arg_info(op->args[2])->val;
45
- uint64_t v2 = arg_info(op->args[2])->val;
46
- int shr = op->args[3];
46
- int shr = op->args[3];
47
-
47
-
48
- if (opc == INDEX_op_extract2_i64) {
48
- if (opc == INDEX_op_extract2_i64) {
49
- tmp = (v1 >> shr) | (v2 << (64 - shr));
49
- tmp = (v1 >> shr) | (v2 << (64 - shr));
50
- } else {
50
- } else {
51
- tmp = (int32_t)(((uint32_t)v1 >> shr) |
51
- tmp = (int32_t)(((uint32_t)v1 >> shr) |
52
- ((uint32_t)v2 << (32 - shr)));
52
- ((uint32_t)v2 << (32 - shr)));
53
- }
53
- }
54
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
54
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
55
- continue;
55
- continue;
56
- }
56
- }
57
- break;
57
- break;
58
-
58
-
59
default:
59
default:
60
break;
60
break;
61
61
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
63
CASE_OP_32_64(eqv):
63
CASE_OP_32_64(eqv):
64
done = fold_eqv(&ctx, op);
64
done = fold_eqv(&ctx, op);
65
break;
65
break;
66
+ CASE_OP_32_64(extract2):
66
+ CASE_OP_32_64(extract2):
67
+ done = fold_extract2(&ctx, op);
67
+ done = fold_extract2(&ctx, op);
68
+ break;
68
+ break;
69
CASE_OP_32_64(ext8s):
69
CASE_OP_32_64(ext8s):
70
CASE_OP_32_64(ext16s):
70
CASE_OP_32_64(ext16s):
71
case INDEX_op_ext32s_i64:
71
case INDEX_op_ext32s_i64:
72
--
72
--
73
2.25.1
73
2.25.1
74
74
75
75
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
5
tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
6
1 file changed, 30 insertions(+), 18 deletions(-)
6
1 file changed, 30 insertions(+), 18 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
13
return fold_const2(ctx, op);
14
}
14
}
15
15
16
+static bool fold_extract(OptContext *ctx, TCGOp *op)
16
+static bool fold_extract(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ if (arg_is_const(op->args[1])) {
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t;
19
+ uint64_t t;
20
+
20
+
21
+ t = arg_info(op->args[1])->val;
21
+ t = arg_info(op->args[1])->val;
22
+ t = extract64(t, op->args[2], op->args[3]);
22
+ t = extract64(t, op->args[2], op->args[3]);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
24
+ }
24
+ }
25
+ return false;
25
+ return false;
26
+}
26
+}
27
+
27
+
28
static bool fold_extract2(OptContext *ctx, TCGOp *op)
28
static bool fold_extract2(OptContext *ctx, TCGOp *op)
29
{
29
{
30
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
30
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
31
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
32
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
32
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
33
}
33
}
34
34
35
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
35
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
36
+{
36
+{
37
+ if (arg_is_const(op->args[1])) {
37
+ if (arg_is_const(op->args[1])) {
38
+ uint64_t t;
38
+ uint64_t t;
39
+
39
+
40
+ t = arg_info(op->args[1])->val;
40
+ t = arg_info(op->args[1])->val;
41
+ t = sextract64(t, op->args[2], op->args[3]);
41
+ t = sextract64(t, op->args[2], op->args[3]);
42
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
42
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
43
+ }
43
+ }
44
+ return false;
44
+ return false;
45
+}
45
+}
46
+
46
+
47
static bool fold_shift(OptContext *ctx, TCGOp *op)
47
static bool fold_shift(OptContext *ctx, TCGOp *op)
48
{
48
{
49
return fold_const2(ctx, op);
49
return fold_const2(ctx, op);
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
51
}
51
}
52
break;
52
break;
53
53
54
- CASE_OP_32_64(extract):
54
- CASE_OP_32_64(extract):
55
- if (arg_is_const(op->args[1])) {
55
- if (arg_is_const(op->args[1])) {
56
- tmp = extract64(arg_info(op->args[1])->val,
56
- tmp = extract64(arg_info(op->args[1])->val,
57
- op->args[2], op->args[3]);
57
- op->args[2], op->args[3]);
58
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
58
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
59
- continue;
59
- continue;
60
- }
60
- }
61
- break;
61
- break;
62
-
62
-
63
- CASE_OP_32_64(sextract):
63
- CASE_OP_32_64(sextract):
64
- if (arg_is_const(op->args[1])) {
64
- if (arg_is_const(op->args[1])) {
65
- tmp = sextract64(arg_info(op->args[1])->val,
65
- tmp = sextract64(arg_info(op->args[1])->val,
66
- op->args[2], op->args[3]);
66
- op->args[2], op->args[3]);
67
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
67
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
68
- continue;
68
- continue;
69
- }
69
- }
70
- break;
70
- break;
71
-
71
-
72
default:
72
default:
73
break;
73
break;
74
74
75
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
75
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
76
CASE_OP_32_64(eqv):
76
CASE_OP_32_64(eqv):
77
done = fold_eqv(&ctx, op);
77
done = fold_eqv(&ctx, op);
78
break;
78
break;
79
+ CASE_OP_32_64(extract):
79
+ CASE_OP_32_64(extract):
80
+ done = fold_extract(&ctx, op);
80
+ done = fold_extract(&ctx, op);
81
+ break;
81
+ break;
82
CASE_OP_32_64(extract2):
82
CASE_OP_32_64(extract2):
83
done = fold_extract2(&ctx, op);
83
done = fold_extract2(&ctx, op);
84
break;
84
break;
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
case INDEX_op_setcond2_i32:
86
case INDEX_op_setcond2_i32:
87
done = fold_setcond2(&ctx, op);
87
done = fold_setcond2(&ctx, op);
88
break;
88
break;
89
+ CASE_OP_32_64(sextract):
89
+ CASE_OP_32_64(sextract):
90
+ done = fold_sextract(&ctx, op);
90
+ done = fold_sextract(&ctx, op);
91
+ break;
91
+ break;
92
CASE_OP_32_64_VEC(sub):
92
CASE_OP_32_64_VEC(sub):
93
done = fold_sub(&ctx, op);
93
done = fold_sub(&ctx, op);
94
break;
94
break;
95
--
95
--
96
2.25.1
96
2.25.1
97
97
98
98
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 25 +++++++++++++++----------
5
tcg/optimize.c | 25 +++++++++++++++----------
6
1 file changed, 15 insertions(+), 10 deletions(-)
6
1 file changed, 15 insertions(+), 10 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
13
return fold_const1(ctx, op);
13
return fold_const1(ctx, op);
14
}
14
}
15
15
16
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
16
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
19
+ uint64_t t1 = arg_info(op->args[1])->val;
19
+ uint64_t t1 = arg_info(op->args[1])->val;
20
+ uint64_t t2 = arg_info(op->args[2])->val;
20
+ uint64_t t2 = arg_info(op->args[2])->val;
21
+
21
+
22
+ t1 = deposit64(t1, op->args[3], op->args[4], t2);
22
+ t1 = deposit64(t1, op->args[3], op->args[4], t2);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
24
+ }
24
+ }
25
+ return false;
25
+ return false;
26
+}
26
+}
27
+
27
+
28
static bool fold_divide(OptContext *ctx, TCGOp *op)
28
static bool fold_divide(OptContext *ctx, TCGOp *op)
29
{
29
{
30
return fold_const2(ctx, op);
30
return fold_const2(ctx, op);
31
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
32
}
32
}
33
break;
33
break;
34
34
35
- CASE_OP_32_64(deposit):
35
- CASE_OP_32_64(deposit):
36
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
36
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
37
- tmp = deposit64(arg_info(op->args[1])->val,
37
- tmp = deposit64(arg_info(op->args[1])->val,
38
- op->args[3], op->args[4],
38
- op->args[3], op->args[4],
39
- arg_info(op->args[2])->val);
39
- arg_info(op->args[2])->val);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
41
- continue;
42
- }
42
- }
43
- break;
43
- break;
44
-
44
-
45
default:
45
default:
46
break;
46
break;
47
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
CASE_OP_32_64(ctpop):
49
CASE_OP_32_64(ctpop):
50
done = fold_ctpop(&ctx, op);
50
done = fold_ctpop(&ctx, op);
51
break;
51
break;
52
+ CASE_OP_32_64(deposit):
52
+ CASE_OP_32_64(deposit):
53
+ done = fold_deposit(&ctx, op);
53
+ done = fold_deposit(&ctx, op);
54
+ break;
54
+ break;
55
CASE_OP_32_64(div):
55
CASE_OP_32_64(div):
56
CASE_OP_32_64(divu):
56
CASE_OP_32_64(divu):
57
done = fold_divide(&ctx, op);
57
done = fold_divide(&ctx, op);
58
--
58
--
59
2.25.1
59
2.25.1
60
60
61
61
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 32 ++++++++++++++++++--------------
5
tcg/optimize.c | 32 ++++++++++++++++++--------------
6
1 file changed, 18 insertions(+), 14 deletions(-)
6
1 file changed, 18 insertions(+), 14 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
13
return true;
13
return true;
14
}
14
}
15
15
16
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
16
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ if (arg_is_const(op->args[1])) {
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t = arg_info(op->args[1])->val;
19
+ uint64_t t = arg_info(op->args[1])->val;
20
+
20
+
21
+ if (t != 0) {
21
+ if (t != 0) {
22
+ t = do_constant_folding(op->opc, t, 0);
22
+ t = do_constant_folding(op->opc, t, 0);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
24
+ }
24
+ }
25
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
25
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
26
+ }
26
+ }
27
+ return false;
27
+ return false;
28
+}
28
+}
29
+
29
+
30
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
30
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
31
{
31
{
32
return fold_const1(ctx, op);
32
return fold_const1(ctx, op);
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
34
}
34
}
35
break;
35
break;
36
36
37
- CASE_OP_32_64(clz):
37
- CASE_OP_32_64(clz):
38
- CASE_OP_32_64(ctz):
38
- CASE_OP_32_64(ctz):
39
- if (arg_is_const(op->args[1])) {
39
- if (arg_is_const(op->args[1])) {
40
- TCGArg v = arg_info(op->args[1])->val;
40
- TCGArg v = arg_info(op->args[1])->val;
41
- if (v != 0) {
41
- if (v != 0) {
42
- tmp = do_constant_folding(opc, v, 0);
42
- tmp = do_constant_folding(opc, v, 0);
43
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
43
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
44
- } else {
44
- } else {
45
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
45
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
46
- }
46
- }
47
- continue;
47
- continue;
48
- }
48
- }
49
- break;
49
- break;
50
-
50
-
51
default:
51
default:
52
break;
52
break;
53
53
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
55
case INDEX_op_brcond2_i32:
55
case INDEX_op_brcond2_i32:
56
done = fold_brcond2(&ctx, op);
56
done = fold_brcond2(&ctx, op);
57
break;
57
break;
58
+ CASE_OP_32_64(clz):
58
+ CASE_OP_32_64(clz):
59
+ CASE_OP_32_64(ctz):
59
+ CASE_OP_32_64(ctz):
60
+ done = fold_count_zeros(&ctx, op);
60
+ done = fold_count_zeros(&ctx, op);
61
+ break;
61
+ break;
62
CASE_OP_32_64(ctpop):
62
CASE_OP_32_64(ctpop):
63
done = fold_ctpop(&ctx, op);
63
done = fold_ctpop(&ctx, op);
64
break;
64
break;
65
--
65
--
66
2.25.1
66
2.25.1
67
67
68
68
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 27 ++++++++++++++++-----------
5
tcg/optimize.c | 27 ++++++++++++++++-----------
6
1 file changed, 16 insertions(+), 11 deletions(-)
6
1 file changed, 16 insertions(+), 11 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
13
return false;
13
return false;
14
}
14
}
15
15
16
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
16
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ if (arg_is_const(op->args[1])) {
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t = arg_info(op->args[1])->val;
19
+ uint64_t t = arg_info(op->args[1])->val;
20
+
20
+
21
+ t = do_constant_folding(op->opc, t, op->args[2]);
21
+ t = do_constant_folding(op->opc, t, op->args[2]);
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
23
+ }
23
+ }
24
+ return false;
24
+ return false;
25
+}
25
+}
26
+
26
+
27
static bool fold_call(OptContext *ctx, TCGOp *op)
27
static bool fold_call(OptContext *ctx, TCGOp *op)
28
{
28
{
29
TCGContext *s = ctx->tcg;
29
TCGContext *s = ctx->tcg;
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
31
}
31
}
32
break;
32
break;
33
33
34
- CASE_OP_32_64(bswap16):
34
- CASE_OP_32_64(bswap16):
35
- CASE_OP_32_64(bswap32):
35
- CASE_OP_32_64(bswap32):
36
- case INDEX_op_bswap64_i64:
36
- case INDEX_op_bswap64_i64:
37
- if (arg_is_const(op->args[1])) {
37
- if (arg_is_const(op->args[1])) {
38
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
38
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
39
- op->args[2]);
39
- op->args[2]);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
41
- continue;
42
- }
42
- }
43
- break;
43
- break;
44
-
44
-
45
default:
45
default:
46
break;
46
break;
47
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
case INDEX_op_brcond2_i32:
49
case INDEX_op_brcond2_i32:
50
done = fold_brcond2(&ctx, op);
50
done = fold_brcond2(&ctx, op);
51
break;
51
break;
52
+ CASE_OP_32_64(bswap16):
52
+ CASE_OP_32_64(bswap16):
53
+ CASE_OP_32_64(bswap32):
53
+ CASE_OP_32_64(bswap32):
54
+ case INDEX_op_bswap64_i64:
54
+ case INDEX_op_bswap64_i64:
55
+ done = fold_bswap(&ctx, op);
55
+ done = fold_bswap(&ctx, op);
56
+ break;
56
+ break;
57
CASE_OP_32_64(clz):
57
CASE_OP_32_64(clz):
58
CASE_OP_32_64(ctz):
58
CASE_OP_32_64(ctz):
59
done = fold_count_zeros(&ctx, op);
59
done = fold_count_zeros(&ctx, op);
60
--
60
--
61
2.25.1
61
2.25.1
62
62
63
63
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
5
tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
6
1 file changed, 31 insertions(+), 22 deletions(-)
6
1 file changed, 31 insertions(+), 22 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/tcg/optimize.c
11
+++ b/tcg/optimize.c
11
+++ b/tcg/optimize.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
13
return fold_const2(ctx, op);
13
return fold_const2(ctx, op);
14
}
14
}
15
15
16
+static bool fold_dup(OptContext *ctx, TCGOp *op)
16
+static bool fold_dup(OptContext *ctx, TCGOp *op)
17
+{
17
+{
18
+ if (arg_is_const(op->args[1])) {
18
+ if (arg_is_const(op->args[1])) {
19
+ uint64_t t = arg_info(op->args[1])->val;
19
+ uint64_t t = arg_info(op->args[1])->val;
20
+ t = dup_const(TCGOP_VECE(op), t);
20
+ t = dup_const(TCGOP_VECE(op), t);
21
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
21
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
22
+ }
22
+ }
23
+ return false;
23
+ return false;
24
+}
24
+}
25
+
25
+
26
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
26
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
27
+{
27
+{
28
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
28
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
29
+ uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
29
+ uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
30
+ arg_info(op->args[2])->val);
30
+ arg_info(op->args[2])->val);
31
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
31
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
32
+ }
32
+ }
33
+
33
+
34
+ if (args_are_copies(op->args[1], op->args[2])) {
34
+ if (args_are_copies(op->args[1], op->args[2])) {
35
+ op->opc = INDEX_op_dup_vec;
35
+ op->opc = INDEX_op_dup_vec;
36
+ TCGOP_VECE(op) = MO_32;
36
+ TCGOP_VECE(op) = MO_32;
37
+ }
37
+ }
38
+ return false;
38
+ return false;
39
+}
39
+}
40
+
40
+
41
static bool fold_eqv(OptContext *ctx, TCGOp *op)
41
static bool fold_eqv(OptContext *ctx, TCGOp *op)
42
{
42
{
43
return fold_const2(ctx, op);
43
return fold_const2(ctx, op);
44
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
44
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
45
done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
45
done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
46
break;
46
break;
47
47
48
- case INDEX_op_dup_vec:
48
- case INDEX_op_dup_vec:
49
- if (arg_is_const(op->args[1])) {
49
- if (arg_is_const(op->args[1])) {
50
- tmp = arg_info(op->args[1])->val;
50
- tmp = arg_info(op->args[1])->val;
51
- tmp = dup_const(TCGOP_VECE(op), tmp);
51
- tmp = dup_const(TCGOP_VECE(op), tmp);
52
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
52
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
53
- continue;
53
- continue;
54
- }
54
- }
55
- break;
55
- break;
56
-
56
-
57
- case INDEX_op_dup2_vec:
57
- case INDEX_op_dup2_vec:
58
- assert(TCG_TARGET_REG_BITS == 32);
58
- assert(TCG_TARGET_REG_BITS == 32);
59
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
59
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
60
- tcg_opt_gen_movi(&ctx, op, op->args[0],
60
- tcg_opt_gen_movi(&ctx, op, op->args[0],
61
- deposit64(arg_info(op->args[1])->val, 32, 32,
61
- deposit64(arg_info(op->args[1])->val, 32, 32,
62
- arg_info(op->args[2])->val));
62
- arg_info(op->args[2])->val));
63
- continue;
63
- continue;
64
- } else if (args_are_copies(op->args[1], op->args[2])) {
64
- } else if (args_are_copies(op->args[1], op->args[2])) {
65
- op->opc = INDEX_op_dup_vec;
65
- op->opc = INDEX_op_dup_vec;
66
- TCGOP_VECE(op) = MO_32;
66
- TCGOP_VECE(op) = MO_32;
67
- }
67
- }
68
- break;
68
- break;
69
-
69
-
70
default:
70
default:
71
break;
71
break;
72
72
73
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
73
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
74
CASE_OP_32_64(divu):
74
CASE_OP_32_64(divu):
75
done = fold_divide(&ctx, op);
75
done = fold_divide(&ctx, op);
76
break;
76
break;
77
+ case INDEX_op_dup_vec:
77
+ case INDEX_op_dup_vec:
78
+ done = fold_dup(&ctx, op);
78
+ done = fold_dup(&ctx, op);
79
+ break;
79
+ break;
80
+ case INDEX_op_dup2_vec:
80
+ case INDEX_op_dup2_vec:
81
+ done = fold_dup2(&ctx, op);
81
+ done = fold_dup2(&ctx, op);
82
+ break;
82
+ break;
83
CASE_OP_32_64(eqv):
83
CASE_OP_32_64(eqv):
84
done = fold_eqv(&ctx, op);
84
done = fold_eqv(&ctx, op);
85
break;
85
break;
86
--
86
--
87
2.25.1
87
2.25.1
88
88
89
89
diff view generated by jsdifflib
1
This is the final entry in the main switch that was in a
1
This is the final entry in the main switch that was in a
2
different form. After this, we have the option to convert
2
different form. After this, we have the option to convert
3
the switch into a function dispatch table.
3
the switch into a function dispatch table.
4
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 27 ++++++++++++++-------------
9
tcg/optimize.c | 27 ++++++++++++++-------------
10
1 file changed, 14 insertions(+), 13 deletions(-)
10
1 file changed, 14 insertions(+), 13 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
17
return true;
17
return true;
18
}
18
}
19
19
20
+static bool fold_mov(OptContext *ctx, TCGOp *op)
20
+static bool fold_mov(OptContext *ctx, TCGOp *op)
21
+{
21
+{
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
23
+}
23
+}
24
+
24
+
25
static bool fold_movcond(OptContext *ctx, TCGOp *op)
25
static bool fold_movcond(OptContext *ctx, TCGOp *op)
26
{
26
{
27
TCGOpcode opc = op->opc;
27
TCGOpcode opc = op->opc;
28
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
28
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
29
break;
29
break;
30
}
30
}
31
31
32
- /* Propagate constants through copy operations and do constant
32
- /* Propagate constants through copy operations and do constant
33
- folding. Constants will be substituted to arguments by register
33
- folding. Constants will be substituted to arguments by register
34
- allocator where needed and possible. Also detect copies. */
34
- allocator where needed and possible. Also detect copies. */
35
+ /*
35
+ /*
36
+ * Process each opcode.
36
+ * Process each opcode.
37
+ * Sorted alphabetically by opcode as much as possible.
37
+ * Sorted alphabetically by opcode as much as possible.
38
+ */
38
+ */
39
switch (opc) {
39
switch (opc) {
40
- CASE_OP_32_64_VEC(mov):
40
- CASE_OP_32_64_VEC(mov):
41
- done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
41
- done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
42
- break;
42
- break;
43
-
43
-
44
- default:
44
- default:
45
- break;
45
- break;
46
-
46
-
47
- /* ---------------------------------------------------------- */
47
- /* ---------------------------------------------------------- */
48
- /* Sorted alphabetically by opcode as much as possible. */
48
- /* Sorted alphabetically by opcode as much as possible. */
49
-
49
-
50
CASE_OP_32_64_VEC(add):
50
CASE_OP_32_64_VEC(add):
51
done = fold_add(&ctx, op);
51
done = fold_add(&ctx, op);
52
break;
52
break;
53
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
53
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
54
case INDEX_op_mb:
54
case INDEX_op_mb:
55
done = fold_mb(&ctx, op);
55
done = fold_mb(&ctx, op);
56
break;
56
break;
57
+ CASE_OP_32_64_VEC(mov):
57
+ CASE_OP_32_64_VEC(mov):
58
+ done = fold_mov(&ctx, op);
58
+ done = fold_mov(&ctx, op);
59
+ break;
59
+ break;
60
CASE_OP_32_64(movcond):
60
CASE_OP_32_64(movcond):
61
done = fold_movcond(&ctx, op);
61
done = fold_movcond(&ctx, op);
62
break;
62
break;
63
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
63
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
64
CASE_OP_32_64_VEC(xor):
64
CASE_OP_32_64_VEC(xor):
65
done = fold_xor(&ctx, op);
65
done = fold_xor(&ctx, op);
66
break;
66
break;
67
+ default:
67
+ default:
68
+ break;
68
+ break;
69
}
69
}
70
70
71
if (!done) {
71
if (!done) {
72
--
72
--
73
2.25.1
73
2.25.1
74
74
75
75
diff view generated by jsdifflib
1
Pull the "op r, a, a => movi r, 0" optimization into a function,
1
Pull the "op r, a, a => movi r, 0" optimization into a function,
2
and use it in the outer opcode fold functions.
2
and use it in the outer opcode fold functions.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
8
tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
9
1 file changed, 24 insertions(+), 17 deletions(-)
9
1 file changed, 24 insertions(+), 17 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
16
return false;
16
return false;
17
}
17
}
18
18
19
+/* If the binary operation has both arguments equal, fold to @i. */
19
+/* If the binary operation has both arguments equal, fold to @i. */
20
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
20
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
21
+{
21
+{
22
+ if (args_are_copies(op->args[1], op->args[2])) {
22
+ if (args_are_copies(op->args[1], op->args[2])) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
24
+ }
25
+ return false;
25
+ return false;
26
+}
26
+}
27
+
27
+
28
/*
28
/*
29
* These outermost fold_<op> functions are sorted alphabetically.
29
* These outermost fold_<op> functions are sorted alphabetically.
30
*/
30
*/
31
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
32
32
33
static bool fold_andc(OptContext *ctx, TCGOp *op)
33
static bool fold_andc(OptContext *ctx, TCGOp *op)
34
{
34
{
35
- return fold_const2(ctx, op);
35
- return fold_const2(ctx, op);
36
+ if (fold_const2(ctx, op) ||
36
+ if (fold_const2(ctx, op) ||
37
+ fold_xx_to_i(ctx, op, 0)) {
37
+ fold_xx_to_i(ctx, op, 0)) {
38
+ return true;
38
+ return true;
39
+ }
39
+ }
40
+ return false;
40
+ return false;
41
}
41
}
42
42
43
static bool fold_brcond(OptContext *ctx, TCGOp *op)
43
static bool fold_brcond(OptContext *ctx, TCGOp *op)
44
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
44
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
45
45
46
static bool fold_sub(OptContext *ctx, TCGOp *op)
46
static bool fold_sub(OptContext *ctx, TCGOp *op)
47
{
47
{
48
- return fold_const2(ctx, op);
48
- return fold_const2(ctx, op);
49
+ if (fold_const2(ctx, op) ||
49
+ if (fold_const2(ctx, op) ||
50
+ fold_xx_to_i(ctx, op, 0)) {
50
+ fold_xx_to_i(ctx, op, 0)) {
51
+ return true;
51
+ return true;
52
+ }
52
+ }
53
+ return false;
53
+ return false;
54
}
54
}
55
55
56
static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
56
static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
57
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
57
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
58
58
59
static bool fold_xor(OptContext *ctx, TCGOp *op)
59
static bool fold_xor(OptContext *ctx, TCGOp *op)
60
{
60
{
61
- return fold_const2(ctx, op);
61
- return fold_const2(ctx, op);
62
+ if (fold_const2(ctx, op) ||
62
+ if (fold_const2(ctx, op) ||
63
+ fold_xx_to_i(ctx, op, 0)) {
63
+ fold_xx_to_i(ctx, op, 0)) {
64
+ return true;
64
+ return true;
65
+ }
65
+ }
66
+ return false;
66
+ return false;
67
}
67
}
68
68
69
/* Propagate constants and copies, fold constant expressions. */
69
/* Propagate constants and copies, fold constant expressions. */
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
break;
71
break;
72
}
72
}
73
73
74
- /* Simplify expression for "op r, a, a => movi r, 0" cases */
74
- /* Simplify expression for "op r, a, a => movi r, 0" cases */
75
- switch (opc) {
75
- switch (opc) {
76
- CASE_OP_32_64_VEC(andc):
76
- CASE_OP_32_64_VEC(andc):
77
- CASE_OP_32_64_VEC(sub):
77
- CASE_OP_32_64_VEC(sub):
78
- CASE_OP_32_64_VEC(xor):
78
- CASE_OP_32_64_VEC(xor):
79
- if (args_are_copies(op->args[1], op->args[2])) {
79
- if (args_are_copies(op->args[1], op->args[2])) {
80
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
80
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
81
- continue;
81
- continue;
82
- }
82
- }
83
- break;
83
- break;
84
- default:
84
- default:
85
- break;
85
- break;
86
- }
86
- }
87
-
87
-
88
/*
88
/*
89
* Process each opcode.
89
* Process each opcode.
90
* Sorted alphabetically by opcode as much as possible.
90
* Sorted alphabetically by opcode as much as possible.
91
--
91
--
92
2.25.1
92
2.25.1
93
93
94
94
diff view generated by jsdifflib
1
Pull the "op r, a, a => mov r, a" optimization into a function,
1
Pull the "op r, a, a => mov r, a" optimization into a function,
2
and use it in the outer opcode fold functions.
2
and use it in the outer opcode fold functions.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
8
tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
9
1 file changed, 24 insertions(+), 15 deletions(-)
9
1 file changed, 24 insertions(+), 15 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
16
return false;
16
return false;
17
}
17
}
18
18
19
+/* If the binary operation has both arguments equal, fold to identity. */
19
+/* If the binary operation has both arguments equal, fold to identity. */
20
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
20
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
21
+{
21
+{
22
+ if (args_are_copies(op->args[1], op->args[2])) {
22
+ if (args_are_copies(op->args[1], op->args[2])) {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
24
+ }
24
+ }
25
+ return false;
25
+ return false;
26
+}
26
+}
27
+
27
+
28
/*
28
/*
29
* These outermost fold_<op> functions are sorted alphabetically.
29
* These outermost fold_<op> functions are sorted alphabetically.
30
+ *
30
+ *
31
+ * The ordering of the transformations should be:
31
+ * The ordering of the transformations should be:
32
+ * 1) those that produce a constant
32
+ * 1) those that produce a constant
33
+ * 2) those that produce a copy
33
+ * 2) those that produce a copy
34
+ * 3) those that produce information about the result value.
34
+ * 3) those that produce information about the result value.
35
*/
35
*/
36
36
37
static bool fold_add(OptContext *ctx, TCGOp *op)
37
static bool fold_add(OptContext *ctx, TCGOp *op)
38
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
38
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
39
39
40
static bool fold_and(OptContext *ctx, TCGOp *op)
40
static bool fold_and(OptContext *ctx, TCGOp *op)
41
{
41
{
42
- return fold_const2(ctx, op);
42
- return fold_const2(ctx, op);
43
+ if (fold_const2(ctx, op) ||
43
+ if (fold_const2(ctx, op) ||
44
+ fold_xx_to_x(ctx, op)) {
44
+ fold_xx_to_x(ctx, op)) {
45
+ return true;
45
+ return true;
46
+ }
46
+ }
47
+ return false;
47
+ return false;
48
}
48
}
49
49
50
static bool fold_andc(OptContext *ctx, TCGOp *op)
50
static bool fold_andc(OptContext *ctx, TCGOp *op)
51
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
51
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
52
52
53
static bool fold_or(OptContext *ctx, TCGOp *op)
53
static bool fold_or(OptContext *ctx, TCGOp *op)
54
{
54
{
55
- return fold_const2(ctx, op);
55
- return fold_const2(ctx, op);
56
+ if (fold_const2(ctx, op) ||
56
+ if (fold_const2(ctx, op) ||
57
+ fold_xx_to_x(ctx, op)) {
57
+ fold_xx_to_x(ctx, op)) {
58
+ return true;
58
+ return true;
59
+ }
59
+ }
60
+ return false;
60
+ return false;
61
}
61
}
62
62
63
static bool fold_orc(OptContext *ctx, TCGOp *op)
63
static bool fold_orc(OptContext *ctx, TCGOp *op)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
65
break;
65
break;
66
}
66
}
67
67
68
- /* Simplify expression for "op r, a, a => mov r, a" cases */
68
- /* Simplify expression for "op r, a, a => mov r, a" cases */
69
- switch (opc) {
69
- switch (opc) {
70
- CASE_OP_32_64_VEC(or):
70
- CASE_OP_32_64_VEC(or):
71
- CASE_OP_32_64_VEC(and):
71
- CASE_OP_32_64_VEC(and):
72
- if (args_are_copies(op->args[1], op->args[2])) {
72
- if (args_are_copies(op->args[1], op->args[2])) {
73
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
73
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
74
- continue;
74
- continue;
75
- }
75
- }
76
- break;
76
- break;
77
- default:
77
- default:
78
- break;
78
- break;
79
- }
79
- }
80
-
80
-
81
/*
81
/*
82
* Process each opcode.
82
* Process each opcode.
83
* Sorted alphabetically by opcode as much as possible.
83
* Sorted alphabetically by opcode as much as possible.
84
--
84
--
85
2.25.1
85
2.25.1
86
86
87
87
diff view generated by jsdifflib
1
Pull the "op r, a, 0 => movi r, 0" optimization into a function,
1
Pull the "op r, a, 0 => movi r, 0" optimization into a function,
2
and use it in the outer opcode fold functions.
2
and use it in the outer opcode fold functions.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 38 ++++++++++++++++++++------------------
8
tcg/optimize.c | 38 ++++++++++++++++++++------------------
9
1 file changed, 20 insertions(+), 18 deletions(-)
9
1 file changed, 20 insertions(+), 18 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
16
return false;
16
return false;
17
}
17
}
18
18
19
+/* If the binary operation has second argument @i, fold to @i. */
19
+/* If the binary operation has second argument @i, fold to @i. */
20
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
20
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
21
+{
21
+{
22
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
22
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
24
+ }
25
+ return false;
25
+ return false;
26
+}
26
+}
27
+
27
+
28
/* If the binary operation has both arguments equal, fold to @i. */
28
/* If the binary operation has both arguments equal, fold to @i. */
29
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
29
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
30
{
30
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
32
static bool fold_and(OptContext *ctx, TCGOp *op)
32
static bool fold_and(OptContext *ctx, TCGOp *op)
33
{
33
{
34
if (fold_const2(ctx, op) ||
34
if (fold_const2(ctx, op) ||
35
+ fold_xi_to_i(ctx, op, 0) ||
35
+ fold_xi_to_i(ctx, op, 0) ||
36
fold_xx_to_x(ctx, op)) {
36
fold_xx_to_x(ctx, op)) {
37
return true;
37
return true;
38
}
38
}
39
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
39
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
40
40
41
static bool fold_mul(OptContext *ctx, TCGOp *op)
41
static bool fold_mul(OptContext *ctx, TCGOp *op)
42
{
42
{
43
- return fold_const2(ctx, op);
43
- return fold_const2(ctx, op);
44
+ if (fold_const2(ctx, op) ||
44
+ if (fold_const2(ctx, op) ||
45
+ fold_xi_to_i(ctx, op, 0)) {
45
+ fold_xi_to_i(ctx, op, 0)) {
46
+ return true;
46
+ return true;
47
+ }
47
+ }
48
+ return false;
48
+ return false;
49
}
49
}
50
50
51
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
51
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
52
{
52
{
53
- return fold_const2(ctx, op);
53
- return fold_const2(ctx, op);
54
+ if (fold_const2(ctx, op) ||
54
+ if (fold_const2(ctx, op) ||
55
+ fold_xi_to_i(ctx, op, 0)) {
55
+ fold_xi_to_i(ctx, op, 0)) {
56
+ return true;
56
+ return true;
57
+ }
57
+ }
58
+ return false;
58
+ return false;
59
}
59
}
60
60
61
static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
61
static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
63
continue;
63
continue;
64
}
64
}
65
65
66
- /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
66
- /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
67
- switch (opc) {
67
- switch (opc) {
68
- CASE_OP_32_64_VEC(and):
68
- CASE_OP_32_64_VEC(and):
69
- CASE_OP_32_64_VEC(mul):
69
- CASE_OP_32_64_VEC(mul):
70
- CASE_OP_32_64(muluh):
70
- CASE_OP_32_64(muluh):
71
- CASE_OP_32_64(mulsh):
71
- CASE_OP_32_64(mulsh):
72
- if (arg_is_const(op->args[2])
72
- if (arg_is_const(op->args[2])
73
- && arg_info(op->args[2])->val == 0) {
73
- && arg_info(op->args[2])->val == 0) {
74
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
74
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
75
- continue;
75
- continue;
76
- }
76
- }
77
- break;
77
- break;
78
- default:
78
- default:
79
- break;
79
- break;
80
- }
80
- }
81
-
81
-
82
/*
82
/*
83
* Process each opcode.
83
* Process each opcode.
84
* Sorted alphabetically by opcode as much as possible.
84
* Sorted alphabetically by opcode as much as possible.
85
--
85
--
86
2.25.1
86
2.25.1
87
87
88
88
diff view generated by jsdifflib
1
Compute the type of the operation early.
1
Compute the type of the operation early.
2
2
3
There are at least 4 places that used a def->flags ladder
3
There are at least 4 places that used a def->flags ladder
4
to determine the type of the operation being optimized.
4
to determine the type of the operation being optimized.
5
5
6
There were two places that assumed !TCG_OPF_64BIT means
6
There were two places that assumed !TCG_OPF_64BIT means
7
TCG_TYPE_I32, and so could potentially compute incorrect
7
TCG_TYPE_I32, and so could potentially compute incorrect
8
results for vector operations.
8
results for vector operations.
9
9
10
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
12
---
13
tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
13
tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
14
1 file changed, 89 insertions(+), 60 deletions(-)
14
1 file changed, 89 insertions(+), 60 deletions(-)
15
15
16
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/optimize.c
18
--- a/tcg/optimize.c
19
+++ b/tcg/optimize.c
19
+++ b/tcg/optimize.c
20
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
20
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
21
21
22
/* In flight values from optimization. */
22
/* In flight values from optimization. */
23
uint64_t z_mask;
23
uint64_t z_mask;
24
+ TCGType type;
24
+ TCGType type;
25
} OptContext;
25
} OptContext;
26
26
27
static inline TempOptInfo *ts_info(TCGTemp *ts)
27
static inline TempOptInfo *ts_info(TCGTemp *ts)
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
29
{
29
{
30
TCGTemp *dst_ts = arg_temp(dst);
30
TCGTemp *dst_ts = arg_temp(dst);
31
TCGTemp *src_ts = arg_temp(src);
31
TCGTemp *src_ts = arg_temp(src);
32
- const TCGOpDef *def;
32
- const TCGOpDef *def;
33
TempOptInfo *di;
33
TempOptInfo *di;
34
TempOptInfo *si;
34
TempOptInfo *si;
35
uint64_t z_mask;
35
uint64_t z_mask;
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
37
reset_ts(dst_ts);
37
reset_ts(dst_ts);
38
di = ts_info(dst_ts);
38
di = ts_info(dst_ts);
39
si = ts_info(src_ts);
39
si = ts_info(src_ts);
40
- def = &tcg_op_defs[op->opc];
40
- def = &tcg_op_defs[op->opc];
41
- if (def->flags & TCG_OPF_VECTOR) {
41
- if (def->flags & TCG_OPF_VECTOR) {
42
- new_op = INDEX_op_mov_vec;
42
- new_op = INDEX_op_mov_vec;
43
- } else if (def->flags & TCG_OPF_64BIT) {
43
- } else if (def->flags & TCG_OPF_64BIT) {
44
- new_op = INDEX_op_mov_i64;
44
- new_op = INDEX_op_mov_i64;
45
- } else {
45
- } else {
46
+
46
+
47
+ switch (ctx->type) {
47
+ switch (ctx->type) {
48
+ case TCG_TYPE_I32:
48
+ case TCG_TYPE_I32:
49
new_op = INDEX_op_mov_i32;
49
new_op = INDEX_op_mov_i32;
50
+ break;
50
+ break;
51
+ case TCG_TYPE_I64:
51
+ case TCG_TYPE_I64:
52
+ new_op = INDEX_op_mov_i64;
52
+ new_op = INDEX_op_mov_i64;
53
+ break;
53
+ break;
54
+ case TCG_TYPE_V64:
54
+ case TCG_TYPE_V64:
55
+ case TCG_TYPE_V128:
55
+ case TCG_TYPE_V128:
56
+ case TCG_TYPE_V256:
56
+ case TCG_TYPE_V256:
57
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
57
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
58
+ new_op = INDEX_op_mov_vec;
58
+ new_op = INDEX_op_mov_vec;
59
+ break;
59
+ break;
60
+ default:
60
+ default:
61
+ g_assert_not_reached();
61
+ g_assert_not_reached();
62
}
62
}
63
op->opc = new_op;
63
op->opc = new_op;
64
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
64
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
65
op->args[0] = dst;
65
op->args[0] = dst;
66
op->args[1] = src;
66
op->args[1] = src;
67
67
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
69
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
69
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
70
TCGArg dst, uint64_t val)
70
TCGArg dst, uint64_t val)
71
{
71
{
72
- const TCGOpDef *def = &tcg_op_defs[op->opc];
72
- const TCGOpDef *def = &tcg_op_defs[op->opc];
73
- TCGType type;
73
- TCGType type;
74
- TCGTemp *tv;
74
- TCGTemp *tv;
75
-
75
-
76
- if (def->flags & TCG_OPF_VECTOR) {
76
- if (def->flags & TCG_OPF_VECTOR) {
77
- type = TCGOP_VECL(op) + TCG_TYPE_V64;
77
- type = TCGOP_VECL(op) + TCG_TYPE_V64;
78
- } else if (def->flags & TCG_OPF_64BIT) {
78
- } else if (def->flags & TCG_OPF_64BIT) {
79
- type = TCG_TYPE_I64;
79
- type = TCG_TYPE_I64;
80
- } else {
80
- } else {
81
- type = TCG_TYPE_I32;
81
- type = TCG_TYPE_I32;
82
- }
82
- }
83
-
83
-
84
/* Convert movi to mov with constant temp. */
84
/* Convert movi to mov with constant temp. */
85
- tv = tcg_constant_internal(type, val);
85
- tv = tcg_constant_internal(type, val);
86
+ TCGTemp *tv = tcg_constant_internal(ctx->type, val);
86
+ TCGTemp *tv = tcg_constant_internal(ctx->type, val);
87
+
87
+
88
init_ts_info(ctx, tv);
88
init_ts_info(ctx, tv);
89
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
89
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
90
}
90
}
91
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
91
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
92
}
92
}
93
}
93
}
94
94
95
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
95
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
96
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
96
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
97
+ uint64_t x, uint64_t y)
97
+ uint64_t x, uint64_t y)
98
{
98
{
99
- const TCGOpDef *def = &tcg_op_defs[op];
99
- const TCGOpDef *def = &tcg_op_defs[op];
100
uint64_t res = do_constant_folding_2(op, x, y);
100
uint64_t res = do_constant_folding_2(op, x, y);
101
- if (!(def->flags & TCG_OPF_64BIT)) {
101
- if (!(def->flags & TCG_OPF_64BIT)) {
102
+ if (type == TCG_TYPE_I32) {
102
+ if (type == TCG_TYPE_I32) {
103
res = (int32_t)res;
103
res = (int32_t)res;
104
}
104
}
105
return res;
105
return res;
106
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
106
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
107
* Return -1 if the condition can't be simplified,
107
* Return -1 if the condition can't be simplified,
108
* and the result of the condition (0 or 1) if it can.
108
* and the result of the condition (0 or 1) if it can.
109
*/
109
*/
110
-static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
110
-static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
111
+static int do_constant_folding_cond(TCGType type, TCGArg x,
111
+static int do_constant_folding_cond(TCGType type, TCGArg x,
112
TCGArg y, TCGCond c)
112
TCGArg y, TCGCond c)
113
{
113
{
114
uint64_t xv = arg_info(x)->val;
114
uint64_t xv = arg_info(x)->val;
115
uint64_t yv = arg_info(y)->val;
115
uint64_t yv = arg_info(y)->val;
116
116
117
if (arg_is_const(x) && arg_is_const(y)) {
117
if (arg_is_const(x) && arg_is_const(y)) {
118
- const TCGOpDef *def = &tcg_op_defs[op];
118
- const TCGOpDef *def = &tcg_op_defs[op];
119
- tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
119
- tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
120
- if (def->flags & TCG_OPF_64BIT) {
120
- if (def->flags & TCG_OPF_64BIT) {
121
- return do_constant_folding_cond_64(xv, yv, c);
121
- return do_constant_folding_cond_64(xv, yv, c);
122
- } else {
122
- } else {
123
+ switch (type) {
123
+ switch (type) {
124
+ case TCG_TYPE_I32:
124
+ case TCG_TYPE_I32:
125
return do_constant_folding_cond_32(xv, yv, c);
125
return do_constant_folding_cond_32(xv, yv, c);
126
+ case TCG_TYPE_I64:
126
+ case TCG_TYPE_I64:
127
+ return do_constant_folding_cond_64(xv, yv, c);
127
+ return do_constant_folding_cond_64(xv, yv, c);
128
+ default:
128
+ default:
129
+ /* Only scalar comparisons are optimizable */
129
+ /* Only scalar comparisons are optimizable */
130
+ return -1;
130
+ return -1;
131
}
131
}
132
} else if (args_are_copies(x, y)) {
132
} else if (args_are_copies(x, y)) {
133
return do_constant_folding_cond_eq(c);
133
return do_constant_folding_cond_eq(c);
134
@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
134
@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
135
uint64_t t;
135
uint64_t t;
136
136
137
t = arg_info(op->args[1])->val;
137
t = arg_info(op->args[1])->val;
138
- t = do_constant_folding(op->opc, t, 0);
138
- t = do_constant_folding(op->opc, t, 0);
139
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
139
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
140
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
140
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
141
}
141
}
142
return false;
142
return false;
143
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
143
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
144
uint64_t t1 = arg_info(op->args[1])->val;
144
uint64_t t1 = arg_info(op->args[1])->val;
145
uint64_t t2 = arg_info(op->args[2])->val;
145
uint64_t t2 = arg_info(op->args[2])->val;
146
146
147
- t1 = do_constant_folding(op->opc, t1, t2);
147
- t1 = do_constant_folding(op->opc, t1, t2);
148
+ t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
148
+ t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
149
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
149
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
150
}
150
}
151
return false;
151
return false;
152
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
152
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
153
static bool fold_brcond(OptContext *ctx, TCGOp *op)
153
static bool fold_brcond(OptContext *ctx, TCGOp *op)
154
{
154
{
155
TCGCond cond = op->args[2];
155
TCGCond cond = op->args[2];
156
- int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
156
- int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
157
+ int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
157
+ int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
158
158
159
if (i == 0) {
159
if (i == 0) {
160
tcg_op_remove(ctx->tcg, op);
160
tcg_op_remove(ctx->tcg, op);
161
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
161
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
162
* Simplify EQ/NE comparisons where one of the pairs
162
* Simplify EQ/NE comparisons where one of the pairs
163
* can be simplified.
163
* can be simplified.
164
*/
164
*/
165
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
165
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
166
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
166
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
167
op->args[2], cond);
167
op->args[2], cond);
168
switch (i ^ inv) {
168
switch (i ^ inv) {
169
case 0:
169
case 0:
170
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
170
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
171
goto do_brcond_high;
171
goto do_brcond_high;
172
}
172
}
173
173
174
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
174
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
175
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
175
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
176
op->args[3], cond);
176
op->args[3], cond);
177
switch (i ^ inv) {
177
switch (i ^ inv) {
178
case 0:
178
case 0:
179
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
179
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
180
if (arg_is_const(op->args[1])) {
180
if (arg_is_const(op->args[1])) {
181
uint64_t t = arg_info(op->args[1])->val;
181
uint64_t t = arg_info(op->args[1])->val;
182
182
183
- t = do_constant_folding(op->opc, t, op->args[2]);
183
- t = do_constant_folding(op->opc, t, op->args[2]);
184
+ t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
184
+ t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
185
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
185
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
186
}
186
}
187
return false;
187
return false;
188
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
188
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
189
uint64_t t = arg_info(op->args[1])->val;
189
uint64_t t = arg_info(op->args[1])->val;
190
190
191
if (t != 0) {
191
if (t != 0) {
192
- t = do_constant_folding(op->opc, t, 0);
192
- t = do_constant_folding(op->opc, t, 0);
193
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
193
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
194
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
194
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
195
}
195
}
196
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
196
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
197
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
197
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
198
198
199
static bool fold_movcond(OptContext *ctx, TCGOp *op)
199
static bool fold_movcond(OptContext *ctx, TCGOp *op)
200
{
200
{
201
- TCGOpcode opc = op->opc;
201
- TCGOpcode opc = op->opc;
202
TCGCond cond = op->args[5];
202
TCGCond cond = op->args[5];
203
- int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
203
- int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
204
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
204
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
205
205
206
if (i >= 0) {
206
if (i >= 0) {
207
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
207
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
208
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
208
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
209
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
209
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
210
uint64_t tv = arg_info(op->args[3])->val;
210
uint64_t tv = arg_info(op->args[3])->val;
211
uint64_t fv = arg_info(op->args[4])->val;
211
uint64_t fv = arg_info(op->args[4])->val;
212
+ TCGOpcode opc;
212
+ TCGOpcode opc;
213
213
214
- opc = (opc == INDEX_op_movcond_i32
214
- opc = (opc == INDEX_op_movcond_i32
215
- ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
215
- ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
216
+ switch (ctx->type) {
216
+ switch (ctx->type) {
217
+ case TCG_TYPE_I32:
217
+ case TCG_TYPE_I32:
218
+ opc = INDEX_op_setcond_i32;
218
+ opc = INDEX_op_setcond_i32;
219
+ break;
219
+ break;
220
+ case TCG_TYPE_I64:
220
+ case TCG_TYPE_I64:
221
+ opc = INDEX_op_setcond_i64;
221
+ opc = INDEX_op_setcond_i64;
222
+ break;
222
+ break;
223
+ default:
223
+ default:
224
+ g_assert_not_reached();
224
+ g_assert_not_reached();
225
+ }
225
+ }
226
226
227
if (tv == 1 && fv == 0) {
227
if (tv == 1 && fv == 0) {
228
op->opc = opc;
228
op->opc = opc;
229
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
229
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
230
static bool fold_setcond(OptContext *ctx, TCGOp *op)
230
static bool fold_setcond(OptContext *ctx, TCGOp *op)
231
{
231
{
232
TCGCond cond = op->args[3];
232
TCGCond cond = op->args[3];
233
- int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
233
- int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
234
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
234
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
235
235
236
if (i >= 0) {
236
if (i >= 0) {
237
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
237
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
238
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
238
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
239
* Simplify EQ/NE comparisons where one of the pairs
239
* Simplify EQ/NE comparisons where one of the pairs
240
* can be simplified.
240
* can be simplified.
241
*/
241
*/
242
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
242
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
243
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
243
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
244
op->args[3], cond);
244
op->args[3], cond);
245
switch (i ^ inv) {
245
switch (i ^ inv) {
246
case 0:
246
case 0:
247
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
247
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
248
goto do_setcond_high;
248
goto do_setcond_high;
249
}
249
}
250
250
251
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
251
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
252
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
252
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
253
op->args[4], cond);
253
op->args[4], cond);
254
switch (i ^ inv) {
254
switch (i ^ inv) {
255
case 0:
255
case 0:
256
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
256
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
257
init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
257
init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
258
copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
258
copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
259
259
260
+ /* Pre-compute the type of the operation. */
260
+ /* Pre-compute the type of the operation. */
261
+ if (def->flags & TCG_OPF_VECTOR) {
261
+ if (def->flags & TCG_OPF_VECTOR) {
262
+ ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
262
+ ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
263
+ } else if (def->flags & TCG_OPF_64BIT) {
263
+ } else if (def->flags & TCG_OPF_64BIT) {
264
+ ctx.type = TCG_TYPE_I64;
264
+ ctx.type = TCG_TYPE_I64;
265
+ } else {
265
+ } else {
266
+ ctx.type = TCG_TYPE_I32;
266
+ ctx.type = TCG_TYPE_I32;
267
+ }
267
+ }
268
+
268
+
269
/* For commutative operations make constant second argument */
269
/* For commutative operations make constant second argument */
270
switch (opc) {
270
switch (opc) {
271
CASE_OP_32_64_VEC(add):
271
CASE_OP_32_64_VEC(add):
272
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
272
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
273
/* Proceed with possible constant folding. */
273
/* Proceed with possible constant folding. */
274
break;
274
break;
275
}
275
}
276
- if (opc == INDEX_op_sub_i32) {
276
- if (opc == INDEX_op_sub_i32) {
277
+ switch (ctx.type) {
277
+ switch (ctx.type) {
278
+ case TCG_TYPE_I32:
278
+ case TCG_TYPE_I32:
279
neg_op = INDEX_op_neg_i32;
279
neg_op = INDEX_op_neg_i32;
280
have_neg = TCG_TARGET_HAS_neg_i32;
280
have_neg = TCG_TARGET_HAS_neg_i32;
281
- } else if (opc == INDEX_op_sub_i64) {
281
- } else if (opc == INDEX_op_sub_i64) {
282
+ break;
282
+ break;
283
+ case TCG_TYPE_I64:
283
+ case TCG_TYPE_I64:
284
neg_op = INDEX_op_neg_i64;
284
neg_op = INDEX_op_neg_i64;
285
have_neg = TCG_TARGET_HAS_neg_i64;
285
have_neg = TCG_TARGET_HAS_neg_i64;
286
- } else if (TCG_TARGET_HAS_neg_vec) {
286
- } else if (TCG_TARGET_HAS_neg_vec) {
287
- TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
287
- TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
288
- unsigned vece = TCGOP_VECE(op);
288
- unsigned vece = TCGOP_VECE(op);
289
- neg_op = INDEX_op_neg_vec;
289
- neg_op = INDEX_op_neg_vec;
290
- have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
290
- have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
291
- } else {
291
- } else {
292
break;
292
break;
293
+ case TCG_TYPE_V64:
293
+ case TCG_TYPE_V64:
294
+ case TCG_TYPE_V128:
294
+ case TCG_TYPE_V128:
295
+ case TCG_TYPE_V256:
295
+ case TCG_TYPE_V256:
296
+ neg_op = INDEX_op_neg_vec;
296
+ neg_op = INDEX_op_neg_vec;
297
+ have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
297
+ have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
298
+ TCGOP_VECE(op)) > 0;
298
+ TCGOP_VECE(op)) > 0;
299
+ break;
299
+ break;
300
+ default:
300
+ default:
301
+ g_assert_not_reached();
301
+ g_assert_not_reached();
302
}
302
}
303
if (!have_neg) {
303
if (!have_neg) {
304
break;
304
break;
305
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
305
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
306
TCGOpcode not_op;
306
TCGOpcode not_op;
307
bool have_not;
307
bool have_not;
308
308
309
- if (def->flags & TCG_OPF_VECTOR) {
309
- if (def->flags & TCG_OPF_VECTOR) {
310
- not_op = INDEX_op_not_vec;
310
- not_op = INDEX_op_not_vec;
311
- have_not = TCG_TARGET_HAS_not_vec;
311
- have_not = TCG_TARGET_HAS_not_vec;
312
- } else if (def->flags & TCG_OPF_64BIT) {
312
- } else if (def->flags & TCG_OPF_64BIT) {
313
- not_op = INDEX_op_not_i64;
313
- not_op = INDEX_op_not_i64;
314
- have_not = TCG_TARGET_HAS_not_i64;
314
- have_not = TCG_TARGET_HAS_not_i64;
315
- } else {
315
- } else {
316
+ switch (ctx.type) {
316
+ switch (ctx.type) {
317
+ case TCG_TYPE_I32:
317
+ case TCG_TYPE_I32:
318
not_op = INDEX_op_not_i32;
318
not_op = INDEX_op_not_i32;
319
have_not = TCG_TARGET_HAS_not_i32;
319
have_not = TCG_TARGET_HAS_not_i32;
320
+ break;
320
+ break;
321
+ case TCG_TYPE_I64:
321
+ case TCG_TYPE_I64:
322
+ not_op = INDEX_op_not_i64;
322
+ not_op = INDEX_op_not_i64;
323
+ have_not = TCG_TARGET_HAS_not_i64;
323
+ have_not = TCG_TARGET_HAS_not_i64;
324
+ break;
324
+ break;
325
+ case TCG_TYPE_V64:
325
+ case TCG_TYPE_V64:
326
+ case TCG_TYPE_V128:
326
+ case TCG_TYPE_V128:
327
+ case TCG_TYPE_V256:
327
+ case TCG_TYPE_V256:
328
+ not_op = INDEX_op_not_vec;
328
+ not_op = INDEX_op_not_vec;
329
+ have_not = TCG_TARGET_HAS_not_vec;
329
+ have_not = TCG_TARGET_HAS_not_vec;
330
+ break;
330
+ break;
331
+ default:
331
+ default:
332
+ g_assert_not_reached();
332
+ g_assert_not_reached();
333
}
333
}
334
if (!have_not) {
334
if (!have_not) {
335
break;
335
break;
336
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
336
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
337
below, we can ignore high bits, but for further optimizations we
337
below, we can ignore high bits, but for further optimizations we
338
need to record that the high bits contain garbage. */
338
need to record that the high bits contain garbage. */
339
partmask = z_mask;
339
partmask = z_mask;
340
- if (!(def->flags & TCG_OPF_64BIT)) {
340
- if (!(def->flags & TCG_OPF_64BIT)) {
341
+ if (ctx.type == TCG_TYPE_I32) {
341
+ if (ctx.type == TCG_TYPE_I32) {
342
z_mask |= ~(tcg_target_ulong)0xffffffffu;
342
z_mask |= ~(tcg_target_ulong)0xffffffffu;
343
partmask &= 0xffffffffu;
343
partmask &= 0xffffffffu;
344
affected &= 0xffffffffu;
344
affected &= 0xffffffffu;
345
--
345
--
346
2.25.1
346
2.25.1
347
347
348
348
diff view generated by jsdifflib
1
Split out the conditional conversion from a more complex logical
1
Split out the conditional conversion from a more complex logical
2
operation to a simple NOT. Create a couple more helpers to make
2
operation to a simple NOT. Create a couple more helpers to make
3
this easy for the outer-most logical operations.
3
this easy for the outer-most logical operations.
4
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
8
tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
9
1 file changed, 86 insertions(+), 72 deletions(-)
9
1 file changed, 86 insertions(+), 72 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
16
return false;
16
return false;
17
}
17
}
18
18
19
+/*
19
+/*
20
+ * Convert @op to NOT, if NOT is supported by the host.
20
+ * Convert @op to NOT, if NOT is supported by the host.
21
+ * Return true f the conversion is successful, which will still
21
+ * Return true f the conversion is successful, which will still
22
+ * indicate that the processing is complete.
22
+ * indicate that the processing is complete.
23
+ */
23
+ */
24
+static bool fold_not(OptContext *ctx, TCGOp *op);
24
+static bool fold_not(OptContext *ctx, TCGOp *op);
25
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
25
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
26
+{
26
+{
27
+ TCGOpcode not_op;
27
+ TCGOpcode not_op;
28
+ bool have_not;
28
+ bool have_not;
29
+
29
+
30
+ switch (ctx->type) {
30
+ switch (ctx->type) {
31
+ case TCG_TYPE_I32:
31
+ case TCG_TYPE_I32:
32
+ not_op = INDEX_op_not_i32;
32
+ not_op = INDEX_op_not_i32;
33
+ have_not = TCG_TARGET_HAS_not_i32;
33
+ have_not = TCG_TARGET_HAS_not_i32;
34
+ break;
34
+ break;
35
+ case TCG_TYPE_I64:
35
+ case TCG_TYPE_I64:
36
+ not_op = INDEX_op_not_i64;
36
+ not_op = INDEX_op_not_i64;
37
+ have_not = TCG_TARGET_HAS_not_i64;
37
+ have_not = TCG_TARGET_HAS_not_i64;
38
+ break;
38
+ break;
39
+ case TCG_TYPE_V64:
39
+ case TCG_TYPE_V64:
40
+ case TCG_TYPE_V128:
40
+ case TCG_TYPE_V128:
41
+ case TCG_TYPE_V256:
41
+ case TCG_TYPE_V256:
42
+ not_op = INDEX_op_not_vec;
42
+ not_op = INDEX_op_not_vec;
43
+ have_not = TCG_TARGET_HAS_not_vec;
43
+ have_not = TCG_TARGET_HAS_not_vec;
44
+ break;
44
+ break;
45
+ default:
45
+ default:
46
+ g_assert_not_reached();
46
+ g_assert_not_reached();
47
+ }
47
+ }
48
+ if (have_not) {
48
+ if (have_not) {
49
+ op->opc = not_op;
49
+ op->opc = not_op;
50
+ op->args[1] = op->args[idx];
50
+ op->args[1] = op->args[idx];
51
+ return fold_not(ctx, op);
51
+ return fold_not(ctx, op);
52
+ }
52
+ }
53
+ return false;
53
+ return false;
54
+}
54
+}
55
+
55
+
56
+/* If the binary operation has first argument @i, fold to NOT. */
56
+/* If the binary operation has first argument @i, fold to NOT. */
57
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
57
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
58
+{
58
+{
59
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
59
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
60
+ return fold_to_not(ctx, op, 2);
60
+ return fold_to_not(ctx, op, 2);
61
+ }
61
+ }
62
+ return false;
62
+ return false;
63
+}
63
+}
64
+
64
+
65
/* If the binary operation has second argument @i, fold to @i. */
65
/* If the binary operation has second argument @i, fold to @i. */
66
static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
66
static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
67
{
67
{
68
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
68
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
69
return false;
69
return false;
70
}
70
}
71
71
72
+/* If the binary operation has second argument @i, fold to NOT. */
72
+/* If the binary operation has second argument @i, fold to NOT. */
73
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
73
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
74
+{
74
+{
75
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
75
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
76
+ return fold_to_not(ctx, op, 1);
76
+ return fold_to_not(ctx, op, 1);
77
+ }
77
+ }
78
+ return false;
78
+ return false;
79
+}
79
+}
80
+
80
+
81
/* If the binary operation has both arguments equal, fold to @i. */
81
/* If the binary operation has both arguments equal, fold to @i. */
82
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
82
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
83
{
83
{
84
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
84
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
85
static bool fold_andc(OptContext *ctx, TCGOp *op)
85
static bool fold_andc(OptContext *ctx, TCGOp *op)
86
{
86
{
87
if (fold_const2(ctx, op) ||
87
if (fold_const2(ctx, op) ||
88
- fold_xx_to_i(ctx, op, 0)) {
88
- fold_xx_to_i(ctx, op, 0)) {
89
+ fold_xx_to_i(ctx, op, 0) ||
89
+ fold_xx_to_i(ctx, op, 0) ||
90
+ fold_ix_to_not(ctx, op, -1)) {
90
+ fold_ix_to_not(ctx, op, -1)) {
91
return true;
91
return true;
92
}
92
}
93
return false;
93
return false;
94
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
94
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
95
95
96
static bool fold_eqv(OptContext *ctx, TCGOp *op)
96
static bool fold_eqv(OptContext *ctx, TCGOp *op)
97
{
97
{
98
- return fold_const2(ctx, op);
98
- return fold_const2(ctx, op);
99
+ if (fold_const2(ctx, op) ||
99
+ if (fold_const2(ctx, op) ||
100
+ fold_xi_to_not(ctx, op, 0)) {
100
+ fold_xi_to_not(ctx, op, 0)) {
101
+ return true;
101
+ return true;
102
+ }
102
+ }
103
+ return false;
103
+ return false;
104
}
104
}
105
105
106
static bool fold_extract(OptContext *ctx, TCGOp *op)
106
static bool fold_extract(OptContext *ctx, TCGOp *op)
107
@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
107
@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
108
108
109
static bool fold_nand(OptContext *ctx, TCGOp *op)
109
static bool fold_nand(OptContext *ctx, TCGOp *op)
110
{
110
{
111
- return fold_const2(ctx, op);
111
- return fold_const2(ctx, op);
112
+ if (fold_const2(ctx, op) ||
112
+ if (fold_const2(ctx, op) ||
113
+ fold_xi_to_not(ctx, op, -1)) {
113
+ fold_xi_to_not(ctx, op, -1)) {
114
+ return true;
114
+ return true;
115
+ }
115
+ }
116
+ return false;
116
+ return false;
117
}
117
}
118
118
119
static bool fold_neg(OptContext *ctx, TCGOp *op)
119
static bool fold_neg(OptContext *ctx, TCGOp *op)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
121
121
122
static bool fold_nor(OptContext *ctx, TCGOp *op)
122
static bool fold_nor(OptContext *ctx, TCGOp *op)
123
{
123
{
124
- return fold_const2(ctx, op);
124
- return fold_const2(ctx, op);
125
+ if (fold_const2(ctx, op) ||
125
+ if (fold_const2(ctx, op) ||
126
+ fold_xi_to_not(ctx, op, 0)) {
126
+ fold_xi_to_not(ctx, op, 0)) {
127
+ return true;
127
+ return true;
128
+ }
128
+ }
129
+ return false;
129
+ return false;
130
}
130
}
131
131
132
static bool fold_not(OptContext *ctx, TCGOp *op)
132
static bool fold_not(OptContext *ctx, TCGOp *op)
133
{
133
{
134
- return fold_const1(ctx, op);
134
- return fold_const1(ctx, op);
135
+ if (fold_const1(ctx, op)) {
135
+ if (fold_const1(ctx, op)) {
136
+ return true;
136
+ return true;
137
+ }
137
+ }
138
+
138
+
139
+ /* Because of fold_to_not, we want to always return true, via finish. */
139
+ /* Because of fold_to_not, we want to always return true, via finish. */
140
+ finish_folding(ctx, op);
140
+ finish_folding(ctx, op);
141
+ return true;
141
+ return true;
142
}
142
}
143
143
144
static bool fold_or(OptContext *ctx, TCGOp *op)
144
static bool fold_or(OptContext *ctx, TCGOp *op)
145
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
145
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
146
146
147
static bool fold_orc(OptContext *ctx, TCGOp *op)
147
static bool fold_orc(OptContext *ctx, TCGOp *op)
148
{
148
{
149
- return fold_const2(ctx, op);
149
- return fold_const2(ctx, op);
150
+ if (fold_const2(ctx, op) ||
150
+ if (fold_const2(ctx, op) ||
151
+ fold_ix_to_not(ctx, op, 0)) {
151
+ fold_ix_to_not(ctx, op, 0)) {
152
+ return true;
152
+ return true;
153
+ }
153
+ }
154
+ return false;
154
+ return false;
155
}
155
}
156
156
157
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
157
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
158
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
158
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
159
static bool fold_xor(OptContext *ctx, TCGOp *op)
159
static bool fold_xor(OptContext *ctx, TCGOp *op)
160
{
160
{
161
if (fold_const2(ctx, op) ||
161
if (fold_const2(ctx, op) ||
162
- fold_xx_to_i(ctx, op, 0)) {
162
- fold_xx_to_i(ctx, op, 0)) {
163
+ fold_xx_to_i(ctx, op, 0) ||
163
+ fold_xx_to_i(ctx, op, 0) ||
164
+ fold_xi_to_not(ctx, op, -1)) {
164
+ fold_xi_to_not(ctx, op, -1)) {
165
return true;
165
return true;
166
}
166
}
167
return false;
167
return false;
168
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
168
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
169
}
169
}
170
}
170
}
171
break;
171
break;
172
- CASE_OP_32_64_VEC(xor):
172
- CASE_OP_32_64_VEC(xor):
173
- CASE_OP_32_64(nand):
173
- CASE_OP_32_64(nand):
174
- if (!arg_is_const(op->args[1])
174
- if (!arg_is_const(op->args[1])
175
- && arg_is_const(op->args[2])
175
- && arg_is_const(op->args[2])
176
- && arg_info(op->args[2])->val == -1) {
176
- && arg_info(op->args[2])->val == -1) {
177
- i = 1;
177
- i = 1;
178
- goto try_not;
178
- goto try_not;
179
- }
179
- }
180
- break;
180
- break;
181
- CASE_OP_32_64(nor):
181
- CASE_OP_32_64(nor):
182
- if (!arg_is_const(op->args[1])
182
- if (!arg_is_const(op->args[1])
183
- && arg_is_const(op->args[2])
183
- && arg_is_const(op->args[2])
184
- && arg_info(op->args[2])->val == 0) {
184
- && arg_info(op->args[2])->val == 0) {
185
- i = 1;
185
- i = 1;
186
- goto try_not;
186
- goto try_not;
187
- }
187
- }
188
- break;
188
- break;
189
- CASE_OP_32_64_VEC(andc):
189
- CASE_OP_32_64_VEC(andc):
190
- if (!arg_is_const(op->args[2])
190
- if (!arg_is_const(op->args[2])
191
- && arg_is_const(op->args[1])
191
- && arg_is_const(op->args[1])
192
- && arg_info(op->args[1])->val == -1) {
192
- && arg_info(op->args[1])->val == -1) {
193
- i = 2;
193
- i = 2;
194
- goto try_not;
194
- goto try_not;
195
- }
195
- }
196
- break;
196
- break;
197
- CASE_OP_32_64_VEC(orc):
197
- CASE_OP_32_64_VEC(orc):
198
- CASE_OP_32_64(eqv):
198
- CASE_OP_32_64(eqv):
199
- if (!arg_is_const(op->args[2])
199
- if (!arg_is_const(op->args[2])
200
- && arg_is_const(op->args[1])
200
- && arg_is_const(op->args[1])
201
- && arg_info(op->args[1])->val == 0) {
201
- && arg_info(op->args[1])->val == 0) {
202
- i = 2;
202
- i = 2;
203
- goto try_not;
203
- goto try_not;
204
- }
204
- }
205
- break;
205
- break;
206
- try_not:
206
- try_not:
207
- {
207
- {
208
- TCGOpcode not_op;
208
- TCGOpcode not_op;
209
- bool have_not;
209
- bool have_not;
210
-
210
-
211
- switch (ctx.type) {
211
- switch (ctx.type) {
212
- case TCG_TYPE_I32:
212
- case TCG_TYPE_I32:
213
- not_op = INDEX_op_not_i32;
213
- not_op = INDEX_op_not_i32;
214
- have_not = TCG_TARGET_HAS_not_i32;
214
- have_not = TCG_TARGET_HAS_not_i32;
215
- break;
215
- break;
216
- case TCG_TYPE_I64:
216
- case TCG_TYPE_I64:
217
- not_op = INDEX_op_not_i64;
217
- not_op = INDEX_op_not_i64;
218
- have_not = TCG_TARGET_HAS_not_i64;
218
- have_not = TCG_TARGET_HAS_not_i64;
219
- break;
219
- break;
220
- case TCG_TYPE_V64:
220
- case TCG_TYPE_V64:
221
- case TCG_TYPE_V128:
221
- case TCG_TYPE_V128:
222
- case TCG_TYPE_V256:
222
- case TCG_TYPE_V256:
223
- not_op = INDEX_op_not_vec;
223
- not_op = INDEX_op_not_vec;
224
- have_not = TCG_TARGET_HAS_not_vec;
224
- have_not = TCG_TARGET_HAS_not_vec;
225
- break;
225
- break;
226
- default:
226
- default:
227
- g_assert_not_reached();
227
- g_assert_not_reached();
228
- }
228
- }
229
- if (!have_not) {
229
- if (!have_not) {
230
- break;
230
- break;
231
- }
231
- }
232
- op->opc = not_op;
232
- op->opc = not_op;
233
- reset_temp(op->args[0]);
233
- reset_temp(op->args[0]);
234
- op->args[1] = op->args[i];
234
- op->args[1] = op->args[i];
235
- continue;
235
- continue;
236
- }
236
- }
237
default:
237
default:
238
break;
238
break;
239
}
239
}
240
--
240
--
241
2.25.1
241
2.25.1
242
242
243
243
diff view generated by jsdifflib
1
Even though there is only one user, place this more complex
1
Even though there is only one user, place this more complex
2
conversion into its own helper.
2
conversion into its own helper.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
7
tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
8
1 file changed, 47 insertions(+), 42 deletions(-)
8
1 file changed, 47 insertions(+), 42 deletions(-)
9
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
15
15
16
static bool fold_neg(OptContext *ctx, TCGOp *op)
16
static bool fold_neg(OptContext *ctx, TCGOp *op)
17
{
17
{
18
- return fold_const1(ctx, op);
18
- return fold_const1(ctx, op);
19
+ if (fold_const1(ctx, op)) {
19
+ if (fold_const1(ctx, op)) {
20
+ return true;
20
+ return true;
21
+ }
21
+ }
22
+ /*
22
+ /*
23
+ * Because of fold_sub_to_neg, we want to always return true,
23
+ * Because of fold_sub_to_neg, we want to always return true,
24
+ * via finish_folding.
24
+ * via finish_folding.
25
+ */
25
+ */
26
+ finish_folding(ctx, op);
26
+ finish_folding(ctx, op);
27
+ return true;
27
+ return true;
28
}
28
}
29
29
30
static bool fold_nor(OptContext *ctx, TCGOp *op)
30
static bool fold_nor(OptContext *ctx, TCGOp *op)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
32
return fold_const2(ctx, op);
32
return fold_const2(ctx, op);
33
}
33
}
34
34
35
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
35
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
36
+{
36
+{
37
+ TCGOpcode neg_op;
37
+ TCGOpcode neg_op;
38
+ bool have_neg;
38
+ bool have_neg;
39
+
39
+
40
+ if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
40
+ if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
41
+ return false;
41
+ return false;
42
+ }
42
+ }
43
+
43
+
44
+ switch (ctx->type) {
44
+ switch (ctx->type) {
45
+ case TCG_TYPE_I32:
45
+ case TCG_TYPE_I32:
46
+ neg_op = INDEX_op_neg_i32;
46
+ neg_op = INDEX_op_neg_i32;
47
+ have_neg = TCG_TARGET_HAS_neg_i32;
47
+ have_neg = TCG_TARGET_HAS_neg_i32;
48
+ break;
48
+ break;
49
+ case TCG_TYPE_I64:
49
+ case TCG_TYPE_I64:
50
+ neg_op = INDEX_op_neg_i64;
50
+ neg_op = INDEX_op_neg_i64;
51
+ have_neg = TCG_TARGET_HAS_neg_i64;
51
+ have_neg = TCG_TARGET_HAS_neg_i64;
52
+ break;
52
+ break;
53
+ case TCG_TYPE_V64:
53
+ case TCG_TYPE_V64:
54
+ case TCG_TYPE_V128:
54
+ case TCG_TYPE_V128:
55
+ case TCG_TYPE_V256:
55
+ case TCG_TYPE_V256:
56
+ neg_op = INDEX_op_neg_vec;
56
+ neg_op = INDEX_op_neg_vec;
57
+ have_neg = (TCG_TARGET_HAS_neg_vec &&
57
+ have_neg = (TCG_TARGET_HAS_neg_vec &&
58
+ tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
58
+ tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
59
+ break;
59
+ break;
60
+ default:
60
+ default:
61
+ g_assert_not_reached();
61
+ g_assert_not_reached();
62
+ }
62
+ }
63
+ if (have_neg) {
63
+ if (have_neg) {
64
+ op->opc = neg_op;
64
+ op->opc = neg_op;
65
+ op->args[1] = op->args[2];
65
+ op->args[1] = op->args[2];
66
+ return fold_neg(ctx, op);
66
+ return fold_neg(ctx, op);
67
+ }
67
+ }
68
+ return false;
68
+ return false;
69
+}
69
+}
70
+
70
+
71
static bool fold_sub(OptContext *ctx, TCGOp *op)
71
static bool fold_sub(OptContext *ctx, TCGOp *op)
72
{
72
{
73
if (fold_const2(ctx, op) ||
73
if (fold_const2(ctx, op) ||
74
- fold_xx_to_i(ctx, op, 0)) {
74
- fold_xx_to_i(ctx, op, 0)) {
75
+ fold_xx_to_i(ctx, op, 0) ||
75
+ fold_xx_to_i(ctx, op, 0) ||
76
+ fold_sub_to_neg(ctx, op)) {
76
+ fold_sub_to_neg(ctx, op)) {
77
return true;
77
return true;
78
}
78
}
79
return false;
79
return false;
80
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
81
continue;
81
continue;
82
}
82
}
83
break;
83
break;
84
- CASE_OP_32_64_VEC(sub):
84
- CASE_OP_32_64_VEC(sub):
85
- {
85
- {
86
- TCGOpcode neg_op;
86
- TCGOpcode neg_op;
87
- bool have_neg;
87
- bool have_neg;
88
-
88
-
89
- if (arg_is_const(op->args[2])) {
89
- if (arg_is_const(op->args[2])) {
90
- /* Proceed with possible constant folding. */
90
- /* Proceed with possible constant folding. */
91
- break;
91
- break;
92
- }
92
- }
93
- switch (ctx.type) {
93
- switch (ctx.type) {
94
- case TCG_TYPE_I32:
94
- case TCG_TYPE_I32:
95
- neg_op = INDEX_op_neg_i32;
95
- neg_op = INDEX_op_neg_i32;
96
- have_neg = TCG_TARGET_HAS_neg_i32;
96
- have_neg = TCG_TARGET_HAS_neg_i32;
97
- break;
97
- break;
98
- case TCG_TYPE_I64:
98
- case TCG_TYPE_I64:
99
- neg_op = INDEX_op_neg_i64;
99
- neg_op = INDEX_op_neg_i64;
100
- have_neg = TCG_TARGET_HAS_neg_i64;
100
- have_neg = TCG_TARGET_HAS_neg_i64;
101
- break;
101
- break;
102
- case TCG_TYPE_V64:
102
- case TCG_TYPE_V64:
103
- case TCG_TYPE_V128:
103
- case TCG_TYPE_V128:
104
- case TCG_TYPE_V256:
104
- case TCG_TYPE_V256:
105
- neg_op = INDEX_op_neg_vec;
105
- neg_op = INDEX_op_neg_vec;
106
- have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
106
- have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
107
- TCGOP_VECE(op)) > 0;
107
- TCGOP_VECE(op)) > 0;
108
- break;
108
- break;
109
- default:
109
- default:
110
- g_assert_not_reached();
110
- g_assert_not_reached();
111
- }
111
- }
112
- if (!have_neg) {
112
- if (!have_neg) {
113
- break;
113
- break;
114
- }
114
- }
115
- if (arg_is_const(op->args[1])
115
- if (arg_is_const(op->args[1])
116
- && arg_info(op->args[1])->val == 0) {
116
- && arg_info(op->args[1])->val == 0) {
117
- op->opc = neg_op;
117
- op->opc = neg_op;
118
- reset_temp(op->args[0]);
118
- reset_temp(op->args[0]);
119
- op->args[1] = op->args[2];
119
- op->args[1] = op->args[2];
120
- continue;
120
- continue;
121
- }
121
- }
122
- }
122
- }
123
- break;
123
- break;
124
default:
124
default:
125
break;
125
break;
126
}
126
}
127
--
127
--
128
2.25.1
128
2.25.1
129
129
130
130
diff view generated by jsdifflib
1
Pull the "op r, a, i => mov r, a" optimization into a function,
1
Pull the "op r, a, i => mov r, a" optimization into a function,
2
and use them in the outer-most logical operations.
2
and use them in the outer-most logical operations.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
7
tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
8
1 file changed, 26 insertions(+), 35 deletions(-)
8
1 file changed, 26 insertions(+), 35 deletions(-)
9
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
15
return false;
15
return false;
16
}
16
}
17
17
18
+/* If the binary operation has second argument @i, fold to identity. */
18
+/* If the binary operation has second argument @i, fold to identity. */
19
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
19
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
20
+{
20
+{
21
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
21
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
23
+ }
23
+ }
24
+ return false;
24
+ return false;
25
+}
25
+}
26
+
26
+
27
/* If the binary operation has second argument @i, fold to NOT. */
27
/* If the binary operation has second argument @i, fold to NOT. */
28
static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
28
static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
29
{
29
{
30
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
30
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
31
31
32
static bool fold_add(OptContext *ctx, TCGOp *op)
32
static bool fold_add(OptContext *ctx, TCGOp *op)
33
{
33
{
34
- return fold_const2(ctx, op);
34
- return fold_const2(ctx, op);
35
+ if (fold_const2(ctx, op) ||
35
+ if (fold_const2(ctx, op) ||
36
+ fold_xi_to_x(ctx, op, 0)) {
36
+ fold_xi_to_x(ctx, op, 0)) {
37
+ return true;
37
+ return true;
38
+ }
38
+ }
39
+ return false;
39
+ return false;
40
}
40
}
41
41
42
static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
42
static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
43
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
43
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
44
{
44
{
45
if (fold_const2(ctx, op) ||
45
if (fold_const2(ctx, op) ||
46
fold_xi_to_i(ctx, op, 0) ||
46
fold_xi_to_i(ctx, op, 0) ||
47
+ fold_xi_to_x(ctx, op, -1) ||
47
+ fold_xi_to_x(ctx, op, -1) ||
48
fold_xx_to_x(ctx, op)) {
48
fold_xx_to_x(ctx, op)) {
49
return true;
49
return true;
50
}
50
}
51
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
51
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
52
{
52
{
53
if (fold_const2(ctx, op) ||
53
if (fold_const2(ctx, op) ||
54
fold_xx_to_i(ctx, op, 0) ||
54
fold_xx_to_i(ctx, op, 0) ||
55
+ fold_xi_to_x(ctx, op, 0) ||
55
+ fold_xi_to_x(ctx, op, 0) ||
56
fold_ix_to_not(ctx, op, -1)) {
56
fold_ix_to_not(ctx, op, -1)) {
57
return true;
57
return true;
58
}
58
}
59
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
59
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
60
static bool fold_eqv(OptContext *ctx, TCGOp *op)
60
static bool fold_eqv(OptContext *ctx, TCGOp *op)
61
{
61
{
62
if (fold_const2(ctx, op) ||
62
if (fold_const2(ctx, op) ||
63
+ fold_xi_to_x(ctx, op, -1) ||
63
+ fold_xi_to_x(ctx, op, -1) ||
64
fold_xi_to_not(ctx, op, 0)) {
64
fold_xi_to_not(ctx, op, 0)) {
65
return true;
65
return true;
66
}
66
}
67
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
67
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
68
static bool fold_or(OptContext *ctx, TCGOp *op)
68
static bool fold_or(OptContext *ctx, TCGOp *op)
69
{
69
{
70
if (fold_const2(ctx, op) ||
70
if (fold_const2(ctx, op) ||
71
+ fold_xi_to_x(ctx, op, 0) ||
71
+ fold_xi_to_x(ctx, op, 0) ||
72
fold_xx_to_x(ctx, op)) {
72
fold_xx_to_x(ctx, op)) {
73
return true;
73
return true;
74
}
74
}
75
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
75
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
76
static bool fold_orc(OptContext *ctx, TCGOp *op)
76
static bool fold_orc(OptContext *ctx, TCGOp *op)
77
{
77
{
78
if (fold_const2(ctx, op) ||
78
if (fold_const2(ctx, op) ||
79
+ fold_xi_to_x(ctx, op, -1) ||
79
+ fold_xi_to_x(ctx, op, -1) ||
80
fold_ix_to_not(ctx, op, 0)) {
80
fold_ix_to_not(ctx, op, 0)) {
81
return true;
81
return true;
82
}
82
}
83
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
83
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
84
84
85
static bool fold_shift(OptContext *ctx, TCGOp *op)
85
static bool fold_shift(OptContext *ctx, TCGOp *op)
86
{
86
{
87
- return fold_const2(ctx, op);
87
- return fold_const2(ctx, op);
88
+ if (fold_const2(ctx, op) ||
88
+ if (fold_const2(ctx, op) ||
89
+ fold_xi_to_x(ctx, op, 0)) {
89
+ fold_xi_to_x(ctx, op, 0)) {
90
+ return true;
90
+ return true;
91
+ }
91
+ }
92
+ return false;
92
+ return false;
93
}
93
}
94
94
95
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
95
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
96
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
96
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
97
{
97
{
98
if (fold_const2(ctx, op) ||
98
if (fold_const2(ctx, op) ||
99
fold_xx_to_i(ctx, op, 0) ||
99
fold_xx_to_i(ctx, op, 0) ||
100
+ fold_xi_to_x(ctx, op, 0) ||
100
+ fold_xi_to_x(ctx, op, 0) ||
101
fold_sub_to_neg(ctx, op)) {
101
fold_sub_to_neg(ctx, op)) {
102
return true;
102
return true;
103
}
103
}
104
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
104
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
105
{
105
{
106
if (fold_const2(ctx, op) ||
106
if (fold_const2(ctx, op) ||
107
fold_xx_to_i(ctx, op, 0) ||
107
fold_xx_to_i(ctx, op, 0) ||
108
+ fold_xi_to_x(ctx, op, 0) ||
108
+ fold_xi_to_x(ctx, op, 0) ||
109
fold_xi_to_not(ctx, op, -1)) {
109
fold_xi_to_not(ctx, op, -1)) {
110
return true;
110
return true;
111
}
111
}
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
break;
113
break;
114
}
114
}
115
115
116
- /* Simplify expression for "op r, a, const => mov r, a" cases */
116
- /* Simplify expression for "op r, a, const => mov r, a" cases */
117
- switch (opc) {
117
- switch (opc) {
118
- CASE_OP_32_64_VEC(add):
118
- CASE_OP_32_64_VEC(add):
119
- CASE_OP_32_64_VEC(sub):
119
- CASE_OP_32_64_VEC(sub):
120
- CASE_OP_32_64_VEC(or):
120
- CASE_OP_32_64_VEC(or):
121
- CASE_OP_32_64_VEC(xor):
121
- CASE_OP_32_64_VEC(xor):
122
- CASE_OP_32_64_VEC(andc):
122
- CASE_OP_32_64_VEC(andc):
123
- CASE_OP_32_64(shl):
123
- CASE_OP_32_64(shl):
124
- CASE_OP_32_64(shr):
124
- CASE_OP_32_64(shr):
125
- CASE_OP_32_64(sar):
125
- CASE_OP_32_64(sar):
126
- CASE_OP_32_64(rotl):
126
- CASE_OP_32_64(rotl):
127
- CASE_OP_32_64(rotr):
127
- CASE_OP_32_64(rotr):
128
- if (!arg_is_const(op->args[1])
128
- if (!arg_is_const(op->args[1])
129
- && arg_is_const(op->args[2])
129
- && arg_is_const(op->args[2])
130
- && arg_info(op->args[2])->val == 0) {
130
- && arg_info(op->args[2])->val == 0) {
131
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
131
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
132
- continue;
132
- continue;
133
- }
133
- }
134
- break;
134
- break;
135
- CASE_OP_32_64_VEC(and):
135
- CASE_OP_32_64_VEC(and):
136
- CASE_OP_32_64_VEC(orc):
136
- CASE_OP_32_64_VEC(orc):
137
- CASE_OP_32_64(eqv):
137
- CASE_OP_32_64(eqv):
138
- if (!arg_is_const(op->args[1])
138
- if (!arg_is_const(op->args[1])
139
- && arg_is_const(op->args[2])
139
- && arg_is_const(op->args[2])
140
- && arg_info(op->args[2])->val == -1) {
140
- && arg_info(op->args[2])->val == -1) {
141
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
141
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
142
- continue;
142
- continue;
143
- }
143
- }
144
- break;
144
- break;
145
- default:
145
- default:
146
- break;
146
- break;
147
- }
147
- }
148
-
148
-
149
/* Simplify using known-zero bits. Currently only ops with a single
149
/* Simplify using known-zero bits. Currently only ops with a single
150
output argument is supported. */
150
output argument is supported. */
151
z_mask = -1;
151
z_mask = -1;
152
--
152
--
153
2.25.1
153
2.25.1
154
154
155
155
diff view generated by jsdifflib
1
Pull the "op r, 0, b => movi r, 0" optimization into a function,
1
Pull the "op r, 0, b => movi r, 0" optimization into a function,
2
and use it in fold_shift.
2
and use it in fold_shift.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 28 ++++++++++------------------
8
tcg/optimize.c | 28 ++++++++++------------------
9
1 file changed, 10 insertions(+), 18 deletions(-)
9
1 file changed, 10 insertions(+), 18 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
16
return false;
16
return false;
17
}
17
}
18
18
19
+/* If the binary operation has first argument @i, fold to @i. */
19
+/* If the binary operation has first argument @i, fold to @i. */
20
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
20
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
21
+{
21
+{
22
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
22
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
24
+ }
24
+ }
25
+ return false;
25
+ return false;
26
+}
26
+}
27
+
27
+
28
/* If the binary operation has first argument @i, fold to NOT. */
28
/* If the binary operation has first argument @i, fold to NOT. */
29
static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
29
static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
30
{
30
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
32
static bool fold_shift(OptContext *ctx, TCGOp *op)
32
static bool fold_shift(OptContext *ctx, TCGOp *op)
33
{
33
{
34
if (fold_const2(ctx, op) ||
34
if (fold_const2(ctx, op) ||
35
+ fold_ix_to_i(ctx, op, 0) ||
35
+ fold_ix_to_i(ctx, op, 0) ||
36
fold_xi_to_x(ctx, op, 0)) {
36
fold_xi_to_x(ctx, op, 0)) {
37
return true;
37
return true;
38
}
38
}
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
40
break;
40
break;
41
}
41
}
42
42
43
- /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
43
- /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
44
- and "sub r, 0, a => neg r, a" case. */
44
- and "sub r, 0, a => neg r, a" case. */
45
- switch (opc) {
45
- switch (opc) {
46
- CASE_OP_32_64(shl):
46
- CASE_OP_32_64(shl):
47
- CASE_OP_32_64(shr):
47
- CASE_OP_32_64(shr):
48
- CASE_OP_32_64(sar):
48
- CASE_OP_32_64(sar):
49
- CASE_OP_32_64(rotl):
49
- CASE_OP_32_64(rotl):
50
- CASE_OP_32_64(rotr):
50
- CASE_OP_32_64(rotr):
51
- if (arg_is_const(op->args[1])
51
- if (arg_is_const(op->args[1])
52
- && arg_info(op->args[1])->val == 0) {
52
- && arg_info(op->args[1])->val == 0) {
53
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
53
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
54
- continue;
54
- continue;
55
- }
55
- }
56
- break;
56
- break;
57
- default:
57
- default:
58
- break;
58
- break;
59
- }
59
- }
60
-
60
-
61
/* Simplify using known-zero bits. Currently only ops with a single
61
/* Simplify using known-zero bits. Currently only ops with a single
62
output argument is supported. */
62
output argument is supported. */
63
z_mask = -1;
63
z_mask = -1;
64
--
64
--
65
2.25.1
65
2.25.1
66
66
67
67
diff view generated by jsdifflib
1
Move all of the known-zero optimizations into the per-opcode
1
Move all of the known-zero optimizations into the per-opcode
2
functions. Use fold_masks when there is a possibility of the
2
functions. Use fold_masks when there is a possibility of the
3
result being determined, and simply set ctx->z_mask otherwise.
3
result being determined, and simply set ctx->z_mask otherwise.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
9
tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
10
1 file changed, 294 insertions(+), 251 deletions(-)
10
1 file changed, 294 insertions(+), 251 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
16
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
17
TCGTempSet temps_used;
17
TCGTempSet temps_used;
18
18
19
/* In flight values from optimization. */
19
/* In flight values from optimization. */
20
- uint64_t z_mask;
20
- uint64_t z_mask;
21
+ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
21
+ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
22
+ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
22
+ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
23
TCGType type;
23
TCGType type;
24
} OptContext;
24
} OptContext;
25
25
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
27
return false;
27
return false;
28
}
28
}
29
29
30
+static bool fold_masks(OptContext *ctx, TCGOp *op)
30
+static bool fold_masks(OptContext *ctx, TCGOp *op)
31
+{
31
+{
32
+ uint64_t a_mask = ctx->a_mask;
32
+ uint64_t a_mask = ctx->a_mask;
33
+ uint64_t z_mask = ctx->z_mask;
33
+ uint64_t z_mask = ctx->z_mask;
34
+
34
+
35
+ /*
35
+ /*
36
+ * 32-bit ops generate 32-bit results. For the result is zero test
36
+ * 32-bit ops generate 32-bit results. For the result is zero test
37
+ * below, we can ignore high bits, but for further optimizations we
37
+ * below, we can ignore high bits, but for further optimizations we
38
+ * need to record that the high bits contain garbage.
38
+ * need to record that the high bits contain garbage.
39
+ */
39
+ */
40
+ if (ctx->type == TCG_TYPE_I32) {
40
+ if (ctx->type == TCG_TYPE_I32) {
41
+ ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
41
+ ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
42
+ a_mask &= MAKE_64BIT_MASK(0, 32);
42
+ a_mask &= MAKE_64BIT_MASK(0, 32);
43
+ z_mask &= MAKE_64BIT_MASK(0, 32);
43
+ z_mask &= MAKE_64BIT_MASK(0, 32);
44
+ }
44
+ }
45
+
45
+
46
+ if (z_mask == 0) {
46
+ if (z_mask == 0) {
47
+ return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
47
+ return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
48
+ }
48
+ }
49
+ if (a_mask == 0) {
49
+ if (a_mask == 0) {
50
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
50
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
51
+ }
51
+ }
52
+ return false;
52
+ return false;
53
+}
53
+}
54
+
54
+
55
/*
55
/*
56
* Convert @op to NOT, if NOT is supported by the host.
56
* Convert @op to NOT, if NOT is supported by the host.
57
* Return true f the conversion is successful, which will still
57
* Return true f the conversion is successful, which will still
58
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
58
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
59
59
60
static bool fold_and(OptContext *ctx, TCGOp *op)
60
static bool fold_and(OptContext *ctx, TCGOp *op)
61
{
61
{
62
+ uint64_t z1, z2;
62
+ uint64_t z1, z2;
63
+
63
+
64
if (fold_const2(ctx, op) ||
64
if (fold_const2(ctx, op) ||
65
fold_xi_to_i(ctx, op, 0) ||
65
fold_xi_to_i(ctx, op, 0) ||
66
fold_xi_to_x(ctx, op, -1) ||
66
fold_xi_to_x(ctx, op, -1) ||
67
fold_xx_to_x(ctx, op)) {
67
fold_xx_to_x(ctx, op)) {
68
return true;
68
return true;
69
}
69
}
70
- return false;
70
- return false;
71
+
71
+
72
+ z1 = arg_info(op->args[1])->z_mask;
72
+ z1 = arg_info(op->args[1])->z_mask;
73
+ z2 = arg_info(op->args[2])->z_mask;
73
+ z2 = arg_info(op->args[2])->z_mask;
74
+ ctx->z_mask = z1 & z2;
74
+ ctx->z_mask = z1 & z2;
75
+
75
+
76
+ /*
76
+ /*
77
+ * Known-zeros does not imply known-ones. Therefore unless
77
+ * Known-zeros does not imply known-ones. Therefore unless
78
+ * arg2 is constant, we can't infer affected bits from it.
78
+ * arg2 is constant, we can't infer affected bits from it.
79
+ */
79
+ */
80
+ if (arg_is_const(op->args[2])) {
80
+ if (arg_is_const(op->args[2])) {
81
+ ctx->a_mask = z1 & ~z2;
81
+ ctx->a_mask = z1 & ~z2;
82
+ }
82
+ }
83
+
83
+
84
+ return fold_masks(ctx, op);
84
+ return fold_masks(ctx, op);
85
}
85
}
86
86
87
static bool fold_andc(OptContext *ctx, TCGOp *op)
87
static bool fold_andc(OptContext *ctx, TCGOp *op)
88
{
88
{
89
+ uint64_t z1;
89
+ uint64_t z1;
90
+
90
+
91
if (fold_const2(ctx, op) ||
91
if (fold_const2(ctx, op) ||
92
fold_xx_to_i(ctx, op, 0) ||
92
fold_xx_to_i(ctx, op, 0) ||
93
fold_xi_to_x(ctx, op, 0) ||
93
fold_xi_to_x(ctx, op, 0) ||
94
fold_ix_to_not(ctx, op, -1)) {
94
fold_ix_to_not(ctx, op, -1)) {
95
return true;
95
return true;
96
}
96
}
97
- return false;
97
- return false;
98
+
98
+
99
+ z1 = arg_info(op->args[1])->z_mask;
99
+ z1 = arg_info(op->args[1])->z_mask;
100
+
100
+
101
+ /*
101
+ /*
102
+ * Known-zeros does not imply known-ones. Therefore unless
102
+ * Known-zeros does not imply known-ones. Therefore unless
103
+ * arg2 is constant, we can't infer anything from it.
103
+ * arg2 is constant, we can't infer anything from it.
104
+ */
104
+ */
105
+ if (arg_is_const(op->args[2])) {
105
+ if (arg_is_const(op->args[2])) {
106
+ uint64_t z2 = ~arg_info(op->args[2])->z_mask;
106
+ uint64_t z2 = ~arg_info(op->args[2])->z_mask;
107
+ ctx->a_mask = z1 & ~z2;
107
+ ctx->a_mask = z1 & ~z2;
108
+ z1 &= z2;
108
+ z1 &= z2;
109
+ }
109
+ }
110
+ ctx->z_mask = z1;
110
+ ctx->z_mask = z1;
111
+
111
+
112
+ return fold_masks(ctx, op);
112
+ return fold_masks(ctx, op);
113
}
113
}
114
114
115
static bool fold_brcond(OptContext *ctx, TCGOp *op)
115
static bool fold_brcond(OptContext *ctx, TCGOp *op)
116
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
116
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
117
117
118
static bool fold_bswap(OptContext *ctx, TCGOp *op)
118
static bool fold_bswap(OptContext *ctx, TCGOp *op)
119
{
119
{
120
+ uint64_t z_mask, sign;
120
+ uint64_t z_mask, sign;
121
+
121
+
122
if (arg_is_const(op->args[1])) {
122
if (arg_is_const(op->args[1])) {
123
uint64_t t = arg_info(op->args[1])->val;
123
uint64_t t = arg_info(op->args[1])->val;
124
124
125
t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
125
t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
126
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
126
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
127
}
127
}
128
- return false;
128
- return false;
129
+
129
+
130
+ z_mask = arg_info(op->args[1])->z_mask;
130
+ z_mask = arg_info(op->args[1])->z_mask;
131
+ switch (op->opc) {
131
+ switch (op->opc) {
132
+ case INDEX_op_bswap16_i32:
132
+ case INDEX_op_bswap16_i32:
133
+ case INDEX_op_bswap16_i64:
133
+ case INDEX_op_bswap16_i64:
134
+ z_mask = bswap16(z_mask);
134
+ z_mask = bswap16(z_mask);
135
+ sign = INT16_MIN;
135
+ sign = INT16_MIN;
136
+ break;
136
+ break;
137
+ case INDEX_op_bswap32_i32:
137
+ case INDEX_op_bswap32_i32:
138
+ case INDEX_op_bswap32_i64:
138
+ case INDEX_op_bswap32_i64:
139
+ z_mask = bswap32(z_mask);
139
+ z_mask = bswap32(z_mask);
140
+ sign = INT32_MIN;
140
+ sign = INT32_MIN;
141
+ break;
141
+ break;
142
+ case INDEX_op_bswap64_i64:
142
+ case INDEX_op_bswap64_i64:
143
+ z_mask = bswap64(z_mask);
143
+ z_mask = bswap64(z_mask);
144
+ sign = INT64_MIN;
144
+ sign = INT64_MIN;
145
+ break;
145
+ break;
146
+ default:
146
+ default:
147
+ g_assert_not_reached();
147
+ g_assert_not_reached();
148
+ }
148
+ }
149
+
149
+
150
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
150
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
151
+ case TCG_BSWAP_OZ:
151
+ case TCG_BSWAP_OZ:
152
+ break;
152
+ break;
153
+ case TCG_BSWAP_OS:
153
+ case TCG_BSWAP_OS:
154
+ /* If the sign bit may be 1, force all the bits above to 1. */
154
+ /* If the sign bit may be 1, force all the bits above to 1. */
155
+ if (z_mask & sign) {
155
+ if (z_mask & sign) {
156
+ z_mask |= sign;
156
+ z_mask |= sign;
157
+ }
157
+ }
158
+ break;
158
+ break;
159
+ default:
159
+ default:
160
+ /* The high bits are undefined: force all bits above the sign to 1. */
160
+ /* The high bits are undefined: force all bits above the sign to 1. */
161
+ z_mask |= sign << 1;
161
+ z_mask |= sign << 1;
162
+ break;
162
+ break;
163
+ }
163
+ }
164
+ ctx->z_mask = z_mask;
164
+ ctx->z_mask = z_mask;
165
+
165
+
166
+ return fold_masks(ctx, op);
166
+ return fold_masks(ctx, op);
167
}
167
}
168
168
169
static bool fold_call(OptContext *ctx, TCGOp *op)
169
static bool fold_call(OptContext *ctx, TCGOp *op)
170
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
170
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
171
171
172
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
172
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
173
{
173
{
174
+ uint64_t z_mask;
174
+ uint64_t z_mask;
175
+
175
+
176
if (arg_is_const(op->args[1])) {
176
if (arg_is_const(op->args[1])) {
177
uint64_t t = arg_info(op->args[1])->val;
177
uint64_t t = arg_info(op->args[1])->val;
178
178
179
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
179
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
180
}
180
}
181
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
181
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
182
}
182
}
183
+
183
+
184
+ switch (ctx->type) {
184
+ switch (ctx->type) {
185
+ case TCG_TYPE_I32:
185
+ case TCG_TYPE_I32:
186
+ z_mask = 31;
186
+ z_mask = 31;
187
+ break;
187
+ break;
188
+ case TCG_TYPE_I64:
188
+ case TCG_TYPE_I64:
189
+ z_mask = 63;
189
+ z_mask = 63;
190
+ break;
190
+ break;
191
+ default:
191
+ default:
192
+ g_assert_not_reached();
192
+ g_assert_not_reached();
193
+ }
193
+ }
194
+ ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
194
+ ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
195
+
195
+
196
return false;
196
return false;
197
}
197
}
198
198
199
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
199
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
200
{
200
{
201
- return fold_const1(ctx, op);
201
- return fold_const1(ctx, op);
202
+ if (fold_const1(ctx, op)) {
202
+ if (fold_const1(ctx, op)) {
203
+ return true;
203
+ return true;
204
+ }
204
+ }
205
+
205
+
206
+ switch (ctx->type) {
206
+ switch (ctx->type) {
207
+ case TCG_TYPE_I32:
207
+ case TCG_TYPE_I32:
208
+ ctx->z_mask = 32 | 31;
208
+ ctx->z_mask = 32 | 31;
209
+ break;
209
+ break;
210
+ case TCG_TYPE_I64:
210
+ case TCG_TYPE_I64:
211
+ ctx->z_mask = 64 | 63;
211
+ ctx->z_mask = 64 | 63;
212
+ break;
212
+ break;
213
+ default:
213
+ default:
214
+ g_assert_not_reached();
214
+ g_assert_not_reached();
215
+ }
215
+ }
216
+ return false;
216
+ return false;
217
}
217
}
218
218
219
static bool fold_deposit(OptContext *ctx, TCGOp *op)
219
static bool fold_deposit(OptContext *ctx, TCGOp *op)
220
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
220
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
221
t1 = deposit64(t1, op->args[3], op->args[4], t2);
221
t1 = deposit64(t1, op->args[3], op->args[4], t2);
222
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
222
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
223
}
223
}
224
+
224
+
225
+ ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
225
+ ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
226
+ op->args[3], op->args[4],
226
+ op->args[3], op->args[4],
227
+ arg_info(op->args[2])->z_mask);
227
+ arg_info(op->args[2])->z_mask);
228
return false;
228
return false;
229
}
229
}
230
230
231
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
231
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
232
232
233
static bool fold_extract(OptContext *ctx, TCGOp *op)
233
static bool fold_extract(OptContext *ctx, TCGOp *op)
234
{
234
{
235
+ uint64_t z_mask_old, z_mask;
235
+ uint64_t z_mask_old, z_mask;
236
+
236
+
237
if (arg_is_const(op->args[1])) {
237
if (arg_is_const(op->args[1])) {
238
uint64_t t;
238
uint64_t t;
239
239
240
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
240
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
241
t = extract64(t, op->args[2], op->args[3]);
241
t = extract64(t, op->args[2], op->args[3]);
242
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
242
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
243
}
243
}
244
- return false;
244
- return false;
245
+
245
+
246
+ z_mask_old = arg_info(op->args[1])->z_mask;
246
+ z_mask_old = arg_info(op->args[1])->z_mask;
247
+ z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
247
+ z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
248
+ if (op->args[2] == 0) {
248
+ if (op->args[2] == 0) {
249
+ ctx->a_mask = z_mask_old ^ z_mask;
249
+ ctx->a_mask = z_mask_old ^ z_mask;
250
+ }
250
+ }
251
+ ctx->z_mask = z_mask;
251
+ ctx->z_mask = z_mask;
252
+
252
+
253
+ return fold_masks(ctx, op);
253
+ return fold_masks(ctx, op);
254
}
254
}
255
255
256
static bool fold_extract2(OptContext *ctx, TCGOp *op)
256
static bool fold_extract2(OptContext *ctx, TCGOp *op)
257
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
257
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
258
258
259
static bool fold_exts(OptContext *ctx, TCGOp *op)
259
static bool fold_exts(OptContext *ctx, TCGOp *op)
260
{
260
{
261
- return fold_const1(ctx, op);
261
- return fold_const1(ctx, op);
262
+ uint64_t z_mask_old, z_mask, sign;
262
+ uint64_t z_mask_old, z_mask, sign;
263
+ bool type_change = false;
263
+ bool type_change = false;
264
+
264
+
265
+ if (fold_const1(ctx, op)) {
265
+ if (fold_const1(ctx, op)) {
266
+ return true;
266
+ return true;
267
+ }
267
+ }
268
+
268
+
269
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
269
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
270
+
270
+
271
+ switch (op->opc) {
271
+ switch (op->opc) {
272
+ CASE_OP_32_64(ext8s):
272
+ CASE_OP_32_64(ext8s):
273
+ sign = INT8_MIN;
273
+ sign = INT8_MIN;
274
+ z_mask = (uint8_t)z_mask;
274
+ z_mask = (uint8_t)z_mask;
275
+ break;
275
+ break;
276
+ CASE_OP_32_64(ext16s):
276
+ CASE_OP_32_64(ext16s):
277
+ sign = INT16_MIN;
277
+ sign = INT16_MIN;
278
+ z_mask = (uint16_t)z_mask;
278
+ z_mask = (uint16_t)z_mask;
279
+ break;
279
+ break;
280
+ case INDEX_op_ext_i32_i64:
280
+ case INDEX_op_ext_i32_i64:
281
+ type_change = true;
281
+ type_change = true;
282
+ QEMU_FALLTHROUGH;
282
+ QEMU_FALLTHROUGH;
283
+ case INDEX_op_ext32s_i64:
283
+ case INDEX_op_ext32s_i64:
284
+ sign = INT32_MIN;
284
+ sign = INT32_MIN;
285
+ z_mask = (uint32_t)z_mask;
285
+ z_mask = (uint32_t)z_mask;
286
+ break;
286
+ break;
287
+ default:
287
+ default:
288
+ g_assert_not_reached();
288
+ g_assert_not_reached();
289
+ }
289
+ }
290
+
290
+
291
+ if (z_mask & sign) {
291
+ if (z_mask & sign) {
292
+ z_mask |= sign;
292
+ z_mask |= sign;
293
+ } else if (!type_change) {
293
+ } else if (!type_change) {
294
+ ctx->a_mask = z_mask_old ^ z_mask;
294
+ ctx->a_mask = z_mask_old ^ z_mask;
295
+ }
295
+ }
296
+ ctx->z_mask = z_mask;
296
+ ctx->z_mask = z_mask;
297
+
297
+
298
+ return fold_masks(ctx, op);
298
+ return fold_masks(ctx, op);
299
}
299
}
300
300
301
static bool fold_extu(OptContext *ctx, TCGOp *op)
301
static bool fold_extu(OptContext *ctx, TCGOp *op)
302
{
302
{
303
- return fold_const1(ctx, op);
303
- return fold_const1(ctx, op);
304
+ uint64_t z_mask_old, z_mask;
304
+ uint64_t z_mask_old, z_mask;
305
+ bool type_change = false;
305
+ bool type_change = false;
306
+
306
+
307
+ if (fold_const1(ctx, op)) {
307
+ if (fold_const1(ctx, op)) {
308
+ return true;
308
+ return true;
309
+ }
309
+ }
310
+
310
+
311
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
311
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
312
+
312
+
313
+ switch (op->opc) {
313
+ switch (op->opc) {
314
+ CASE_OP_32_64(ext8u):
314
+ CASE_OP_32_64(ext8u):
315
+ z_mask = (uint8_t)z_mask;
315
+ z_mask = (uint8_t)z_mask;
316
+ break;
316
+ break;
317
+ CASE_OP_32_64(ext16u):
317
+ CASE_OP_32_64(ext16u):
318
+ z_mask = (uint16_t)z_mask;
318
+ z_mask = (uint16_t)z_mask;
319
+ break;
319
+ break;
320
+ case INDEX_op_extrl_i64_i32:
320
+ case INDEX_op_extrl_i64_i32:
321
+ case INDEX_op_extu_i32_i64:
321
+ case INDEX_op_extu_i32_i64:
322
+ type_change = true;
322
+ type_change = true;
323
+ QEMU_FALLTHROUGH;
323
+ QEMU_FALLTHROUGH;
324
+ case INDEX_op_ext32u_i64:
324
+ case INDEX_op_ext32u_i64:
325
+ z_mask = (uint32_t)z_mask;
325
+ z_mask = (uint32_t)z_mask;
326
+ break;
326
+ break;
327
+ case INDEX_op_extrh_i64_i32:
327
+ case INDEX_op_extrh_i64_i32:
328
+ type_change = true;
328
+ type_change = true;
329
+ z_mask >>= 32;
329
+ z_mask >>= 32;
330
+ break;
330
+ break;
331
+ default:
331
+ default:
332
+ g_assert_not_reached();
332
+ g_assert_not_reached();
333
+ }
333
+ }
334
+
334
+
335
+ ctx->z_mask = z_mask;
335
+ ctx->z_mask = z_mask;
336
+ if (!type_change) {
336
+ if (!type_change) {
337
+ ctx->a_mask = z_mask_old ^ z_mask;
337
+ ctx->a_mask = z_mask_old ^ z_mask;
338
+ }
338
+ }
339
+ return fold_masks(ctx, op);
339
+ return fold_masks(ctx, op);
340
}
340
}
341
341
342
static bool fold_mb(OptContext *ctx, TCGOp *op)
342
static bool fold_mb(OptContext *ctx, TCGOp *op)
343
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
343
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
344
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
344
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
345
}
345
}
346
346
347
+ ctx->z_mask = arg_info(op->args[3])->z_mask
347
+ ctx->z_mask = arg_info(op->args[3])->z_mask
348
+ | arg_info(op->args[4])->z_mask;
348
+ | arg_info(op->args[4])->z_mask;
349
+
349
+
350
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
350
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
351
uint64_t tv = arg_info(op->args[3])->val;
351
uint64_t tv = arg_info(op->args[3])->val;
352
uint64_t fv = arg_info(op->args[4])->val;
352
uint64_t fv = arg_info(op->args[4])->val;
353
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
353
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
354
354
355
static bool fold_neg(OptContext *ctx, TCGOp *op)
355
static bool fold_neg(OptContext *ctx, TCGOp *op)
356
{
356
{
357
+ uint64_t z_mask;
357
+ uint64_t z_mask;
358
+
358
+
359
if (fold_const1(ctx, op)) {
359
if (fold_const1(ctx, op)) {
360
return true;
360
return true;
361
}
361
}
362
+
362
+
363
+ /* Set to 1 all bits to the left of the rightmost. */
363
+ /* Set to 1 all bits to the left of the rightmost. */
364
+ z_mask = arg_info(op->args[1])->z_mask;
364
+ z_mask = arg_info(op->args[1])->z_mask;
365
+ ctx->z_mask = -(z_mask & -z_mask);
365
+ ctx->z_mask = -(z_mask & -z_mask);
366
+
366
+
367
/*
367
/*
368
* Because of fold_sub_to_neg, we want to always return true,
368
* Because of fold_sub_to_neg, we want to always return true,
369
* via finish_folding.
369
* via finish_folding.
370
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
370
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
371
fold_xx_to_x(ctx, op)) {
371
fold_xx_to_x(ctx, op)) {
372
return true;
372
return true;
373
}
373
}
374
- return false;
374
- return false;
375
+
375
+
376
+ ctx->z_mask = arg_info(op->args[1])->z_mask
376
+ ctx->z_mask = arg_info(op->args[1])->z_mask
377
+ | arg_info(op->args[2])->z_mask;
377
+ | arg_info(op->args[2])->z_mask;
378
+ return fold_masks(ctx, op);
378
+ return fold_masks(ctx, op);
379
}
379
}
380
380
381
static bool fold_orc(OptContext *ctx, TCGOp *op)
381
static bool fold_orc(OptContext *ctx, TCGOp *op)
382
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
382
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
383
383
384
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
384
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
385
{
385
{
386
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
386
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
387
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
387
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
388
+ MemOp mop = get_memop(oi);
388
+ MemOp mop = get_memop(oi);
389
+ int width = 8 * memop_size(mop);
389
+ int width = 8 * memop_size(mop);
390
+
390
+
391
+ if (!(mop & MO_SIGN) && width < 64) {
391
+ if (!(mop & MO_SIGN) && width < 64) {
392
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
392
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
393
+ }
393
+ }
394
+
394
+
395
/* Opcodes that touch guest memory stop the mb optimization. */
395
/* Opcodes that touch guest memory stop the mb optimization. */
396
ctx->prev_mb = NULL;
396
ctx->prev_mb = NULL;
397
return false;
397
return false;
398
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
398
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
399
if (i >= 0) {
399
if (i >= 0) {
400
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
400
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
401
}
401
}
402
+
402
+
403
+ ctx->z_mask = 1;
403
+ ctx->z_mask = 1;
404
return false;
404
return false;
405
}
405
}
406
406
407
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
407
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
408
op->opc = INDEX_op_setcond_i32;
408
op->opc = INDEX_op_setcond_i32;
409
break;
409
break;
410
}
410
}
411
+
411
+
412
+ ctx->z_mask = 1;
412
+ ctx->z_mask = 1;
413
return false;
413
return false;
414
414
415
do_setcond_const:
415
do_setcond_const:
416
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
416
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
417
417
418
static bool fold_sextract(OptContext *ctx, TCGOp *op)
418
static bool fold_sextract(OptContext *ctx, TCGOp *op)
419
{
419
{
420
+ int64_t z_mask_old, z_mask;
420
+ int64_t z_mask_old, z_mask;
421
+
421
+
422
if (arg_is_const(op->args[1])) {
422
if (arg_is_const(op->args[1])) {
423
uint64_t t;
423
uint64_t t;
424
424
425
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
425
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
426
t = sextract64(t, op->args[2], op->args[3]);
426
t = sextract64(t, op->args[2], op->args[3]);
427
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
427
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
428
}
428
}
429
- return false;
429
- return false;
430
+
430
+
431
+ z_mask_old = arg_info(op->args[1])->z_mask;
431
+ z_mask_old = arg_info(op->args[1])->z_mask;
432
+ z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
432
+ z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
433
+ if (op->args[2] == 0 && z_mask >= 0) {
433
+ if (op->args[2] == 0 && z_mask >= 0) {
434
+ ctx->a_mask = z_mask_old ^ z_mask;
434
+ ctx->a_mask = z_mask_old ^ z_mask;
435
+ }
435
+ }
436
+ ctx->z_mask = z_mask;
436
+ ctx->z_mask = z_mask;
437
+
437
+
438
+ return fold_masks(ctx, op);
438
+ return fold_masks(ctx, op);
439
}
439
}
440
440
441
static bool fold_shift(OptContext *ctx, TCGOp *op)
441
static bool fold_shift(OptContext *ctx, TCGOp *op)
442
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
442
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
443
fold_xi_to_x(ctx, op, 0)) {
443
fold_xi_to_x(ctx, op, 0)) {
444
return true;
444
return true;
445
}
445
}
446
+
446
+
447
+ if (arg_is_const(op->args[2])) {
447
+ if (arg_is_const(op->args[2])) {
448
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type,
448
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type,
449
+ arg_info(op->args[1])->z_mask,
449
+ arg_info(op->args[1])->z_mask,
450
+ arg_info(op->args[2])->val);
450
+ arg_info(op->args[2])->val);
451
+ return fold_masks(ctx, op);
451
+ return fold_masks(ctx, op);
452
+ }
452
+ }
453
return false;
453
return false;
454
}
454
}
455
455
456
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
456
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
457
return fold_addsub2_i32(ctx, op, false);
457
return fold_addsub2_i32(ctx, op, false);
458
}
458
}
459
459
460
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
460
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
461
+{
461
+{
462
+ /* We can't do any folding with a load, but we can record bits. */
462
+ /* We can't do any folding with a load, but we can record bits. */
463
+ switch (op->opc) {
463
+ switch (op->opc) {
464
+ CASE_OP_32_64(ld8u):
464
+ CASE_OP_32_64(ld8u):
465
+ ctx->z_mask = MAKE_64BIT_MASK(0, 8);
465
+ ctx->z_mask = MAKE_64BIT_MASK(0, 8);
466
+ break;
466
+ break;
467
+ CASE_OP_32_64(ld16u):
467
+ CASE_OP_32_64(ld16u):
468
+ ctx->z_mask = MAKE_64BIT_MASK(0, 16);
468
+ ctx->z_mask = MAKE_64BIT_MASK(0, 16);
469
+ break;
469
+ break;
470
+ case INDEX_op_ld32u_i64:
470
+ case INDEX_op_ld32u_i64:
471
+ ctx->z_mask = MAKE_64BIT_MASK(0, 32);
471
+ ctx->z_mask = MAKE_64BIT_MASK(0, 32);
472
+ break;
472
+ break;
473
+ default:
473
+ default:
474
+ g_assert_not_reached();
474
+ g_assert_not_reached();
475
+ }
475
+ }
476
+ return false;
476
+ return false;
477
+}
477
+}
478
+
478
+
479
static bool fold_xor(OptContext *ctx, TCGOp *op)
479
static bool fold_xor(OptContext *ctx, TCGOp *op)
480
{
480
{
481
if (fold_const2(ctx, op) ||
481
if (fold_const2(ctx, op) ||
482
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
482
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
483
fold_xi_to_not(ctx, op, -1)) {
483
fold_xi_to_not(ctx, op, -1)) {
484
return true;
484
return true;
485
}
485
}
486
- return false;
486
- return false;
487
+
487
+
488
+ ctx->z_mask = arg_info(op->args[1])->z_mask
488
+ ctx->z_mask = arg_info(op->args[1])->z_mask
489
+ | arg_info(op->args[2])->z_mask;
489
+ | arg_info(op->args[2])->z_mask;
490
+ return fold_masks(ctx, op);
490
+ return fold_masks(ctx, op);
491
}
491
}
492
492
493
/* Propagate constants and copies, fold constant expressions. */
493
/* Propagate constants and copies, fold constant expressions. */
494
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
494
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
495
}
495
}
496
496
497
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
497
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
498
- uint64_t z_mask, partmask, affected, tmp;
498
- uint64_t z_mask, partmask, affected, tmp;
499
TCGOpcode opc = op->opc;
499
TCGOpcode opc = op->opc;
500
const TCGOpDef *def;
500
const TCGOpDef *def;
501
bool done = false;
501
bool done = false;
502
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
502
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
503
break;
503
break;
504
}
504
}
505
505
506
- /* Simplify using known-zero bits. Currently only ops with a single
506
- /* Simplify using known-zero bits. Currently only ops with a single
507
- output argument is supported. */
507
- output argument is supported. */
508
- z_mask = -1;
508
- z_mask = -1;
509
- affected = -1;
509
- affected = -1;
510
- switch (opc) {
510
- switch (opc) {
511
- CASE_OP_32_64(ext8s):
511
- CASE_OP_32_64(ext8s):
512
- if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
512
- if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
513
- break;
513
- break;
514
- }
514
- }
515
- QEMU_FALLTHROUGH;
515
- QEMU_FALLTHROUGH;
516
- CASE_OP_32_64(ext8u):
516
- CASE_OP_32_64(ext8u):
517
- z_mask = 0xff;
517
- z_mask = 0xff;
518
- goto and_const;
518
- goto and_const;
519
- CASE_OP_32_64(ext16s):
519
- CASE_OP_32_64(ext16s):
520
- if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
520
- if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
521
- break;
521
- break;
522
- }
522
- }
523
- QEMU_FALLTHROUGH;
523
- QEMU_FALLTHROUGH;
524
- CASE_OP_32_64(ext16u):
524
- CASE_OP_32_64(ext16u):
525
- z_mask = 0xffff;
525
- z_mask = 0xffff;
526
- goto and_const;
526
- goto and_const;
527
- case INDEX_op_ext32s_i64:
527
- case INDEX_op_ext32s_i64:
528
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
528
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
529
- break;
529
- break;
530
- }
530
- }
531
- QEMU_FALLTHROUGH;
531
- QEMU_FALLTHROUGH;
532
- case INDEX_op_ext32u_i64:
532
- case INDEX_op_ext32u_i64:
533
- z_mask = 0xffffffffU;
533
- z_mask = 0xffffffffU;
534
- goto and_const;
534
- goto and_const;
535
-
535
-
536
- CASE_OP_32_64(and):
536
- CASE_OP_32_64(and):
537
- z_mask = arg_info(op->args[2])->z_mask;
537
- z_mask = arg_info(op->args[2])->z_mask;
538
- if (arg_is_const(op->args[2])) {
538
- if (arg_is_const(op->args[2])) {
539
- and_const:
539
- and_const:
540
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
540
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
541
- }
541
- }
542
- z_mask = arg_info(op->args[1])->z_mask & z_mask;
542
- z_mask = arg_info(op->args[1])->z_mask & z_mask;
543
- break;
543
- break;
544
-
544
-
545
- case INDEX_op_ext_i32_i64:
545
- case INDEX_op_ext_i32_i64:
546
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
546
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
547
- break;
547
- break;
548
- }
548
- }
549
- QEMU_FALLTHROUGH;
549
- QEMU_FALLTHROUGH;
550
- case INDEX_op_extu_i32_i64:
550
- case INDEX_op_extu_i32_i64:
551
- /* We do not compute affected as it is a size changing op. */
551
- /* We do not compute affected as it is a size changing op. */
552
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
552
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
553
- break;
553
- break;
554
-
554
-
555
- CASE_OP_32_64(andc):
555
- CASE_OP_32_64(andc):
556
- /* Known-zeros does not imply known-ones. Therefore unless
556
- /* Known-zeros does not imply known-ones. Therefore unless
557
- op->args[2] is constant, we can't infer anything from it. */
557
- op->args[2] is constant, we can't infer anything from it. */
558
- if (arg_is_const(op->args[2])) {
558
- if (arg_is_const(op->args[2])) {
559
- z_mask = ~arg_info(op->args[2])->z_mask;
559
- z_mask = ~arg_info(op->args[2])->z_mask;
560
- goto and_const;
560
- goto and_const;
561
- }
561
- }
562
- /* But we certainly know nothing outside args[1] may be set. */
562
- /* But we certainly know nothing outside args[1] may be set. */
563
- z_mask = arg_info(op->args[1])->z_mask;
563
- z_mask = arg_info(op->args[1])->z_mask;
564
- break;
564
- break;
565
-
565
-
566
- case INDEX_op_sar_i32:
566
- case INDEX_op_sar_i32:
567
- if (arg_is_const(op->args[2])) {
567
- if (arg_is_const(op->args[2])) {
568
- tmp = arg_info(op->args[2])->val & 31;
568
- tmp = arg_info(op->args[2])->val & 31;
569
- z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
569
- z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
570
- }
570
- }
571
- break;
571
- break;
572
- case INDEX_op_sar_i64:
572
- case INDEX_op_sar_i64:
573
- if (arg_is_const(op->args[2])) {
573
- if (arg_is_const(op->args[2])) {
574
- tmp = arg_info(op->args[2])->val & 63;
574
- tmp = arg_info(op->args[2])->val & 63;
575
- z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
575
- z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
576
- }
576
- }
577
- break;
577
- break;
578
-
578
-
579
- case INDEX_op_shr_i32:
579
- case INDEX_op_shr_i32:
580
- if (arg_is_const(op->args[2])) {
580
- if (arg_is_const(op->args[2])) {
581
- tmp = arg_info(op->args[2])->val & 31;
581
- tmp = arg_info(op->args[2])->val & 31;
582
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
582
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
583
- }
583
- }
584
- break;
584
- break;
585
- case INDEX_op_shr_i64:
585
- case INDEX_op_shr_i64:
586
- if (arg_is_const(op->args[2])) {
586
- if (arg_is_const(op->args[2])) {
587
- tmp = arg_info(op->args[2])->val & 63;
587
- tmp = arg_info(op->args[2])->val & 63;
588
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
588
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
589
- }
589
- }
590
- break;
590
- break;
591
-
591
-
592
- case INDEX_op_extrl_i64_i32:
592
- case INDEX_op_extrl_i64_i32:
593
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
593
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
594
- break;
594
- break;
595
- case INDEX_op_extrh_i64_i32:
595
- case INDEX_op_extrh_i64_i32:
596
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
596
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
597
- break;
597
- break;
598
-
598
-
599
- CASE_OP_32_64(shl):
599
- CASE_OP_32_64(shl):
600
- if (arg_is_const(op->args[2])) {
600
- if (arg_is_const(op->args[2])) {
601
- tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
601
- tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
602
- z_mask = arg_info(op->args[1])->z_mask << tmp;
602
- z_mask = arg_info(op->args[1])->z_mask << tmp;
603
- }
603
- }
604
- break;
604
- break;
605
-
605
-
606
- CASE_OP_32_64(neg):
606
- CASE_OP_32_64(neg):
607
- /* Set to 1 all bits to the left of the rightmost. */
607
- /* Set to 1 all bits to the left of the rightmost. */
608
- z_mask = -(arg_info(op->args[1])->z_mask
608
- z_mask = -(arg_info(op->args[1])->z_mask
609
- & -arg_info(op->args[1])->z_mask);
609
- & -arg_info(op->args[1])->z_mask);
610
- break;
610
- break;
611
-
611
-
612
- CASE_OP_32_64(deposit):
612
- CASE_OP_32_64(deposit):
613
- z_mask = deposit64(arg_info(op->args[1])->z_mask,
613
- z_mask = deposit64(arg_info(op->args[1])->z_mask,
614
- op->args[3], op->args[4],
614
- op->args[3], op->args[4],
615
- arg_info(op->args[2])->z_mask);
615
- arg_info(op->args[2])->z_mask);
616
- break;
616
- break;
617
-
617
-
618
- CASE_OP_32_64(extract):
618
- CASE_OP_32_64(extract):
619
- z_mask = extract64(arg_info(op->args[1])->z_mask,
619
- z_mask = extract64(arg_info(op->args[1])->z_mask,
620
- op->args[2], op->args[3]);
620
- op->args[2], op->args[3]);
621
- if (op->args[2] == 0) {
621
- if (op->args[2] == 0) {
622
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
622
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
623
- }
623
- }
624
- break;
624
- break;
625
- CASE_OP_32_64(sextract):
625
- CASE_OP_32_64(sextract):
626
- z_mask = sextract64(arg_info(op->args[1])->z_mask,
626
- z_mask = sextract64(arg_info(op->args[1])->z_mask,
627
- op->args[2], op->args[3]);
627
- op->args[2], op->args[3]);
628
- if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
628
- if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
629
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
629
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
630
- }
630
- }
631
- break;
631
- break;
632
-
632
-
633
- CASE_OP_32_64(or):
633
- CASE_OP_32_64(or):
634
- CASE_OP_32_64(xor):
634
- CASE_OP_32_64(xor):
635
- z_mask = arg_info(op->args[1])->z_mask
635
- z_mask = arg_info(op->args[1])->z_mask
636
- | arg_info(op->args[2])->z_mask;
636
- | arg_info(op->args[2])->z_mask;
637
- break;
637
- break;
638
-
638
-
639
- case INDEX_op_clz_i32:
639
- case INDEX_op_clz_i32:
640
- case INDEX_op_ctz_i32:
640
- case INDEX_op_ctz_i32:
641
- z_mask = arg_info(op->args[2])->z_mask | 31;
641
- z_mask = arg_info(op->args[2])->z_mask | 31;
642
- break;
642
- break;
643
-
643
-
644
- case INDEX_op_clz_i64:
644
- case INDEX_op_clz_i64:
645
- case INDEX_op_ctz_i64:
645
- case INDEX_op_ctz_i64:
646
- z_mask = arg_info(op->args[2])->z_mask | 63;
646
- z_mask = arg_info(op->args[2])->z_mask | 63;
647
- break;
647
- break;
648
-
648
-
649
- case INDEX_op_ctpop_i32:
649
- case INDEX_op_ctpop_i32:
650
- z_mask = 32 | 31;
650
- z_mask = 32 | 31;
651
- break;
651
- break;
652
- case INDEX_op_ctpop_i64:
652
- case INDEX_op_ctpop_i64:
653
- z_mask = 64 | 63;
653
- z_mask = 64 | 63;
654
- break;
654
- break;
655
-
655
-
656
- CASE_OP_32_64(setcond):
656
- CASE_OP_32_64(setcond):
657
- case INDEX_op_setcond2_i32:
657
- case INDEX_op_setcond2_i32:
658
- z_mask = 1;
658
- z_mask = 1;
659
- break;
659
- break;
660
-
660
-
661
- CASE_OP_32_64(movcond):
661
- CASE_OP_32_64(movcond):
662
- z_mask = arg_info(op->args[3])->z_mask
662
- z_mask = arg_info(op->args[3])->z_mask
663
- | arg_info(op->args[4])->z_mask;
663
- | arg_info(op->args[4])->z_mask;
664
- break;
664
- break;
665
-
665
-
666
- CASE_OP_32_64(ld8u):
666
- CASE_OP_32_64(ld8u):
667
- z_mask = 0xff;
667
- z_mask = 0xff;
668
- break;
668
- break;
669
- CASE_OP_32_64(ld16u):
669
- CASE_OP_32_64(ld16u):
670
- z_mask = 0xffff;
670
- z_mask = 0xffff;
671
- break;
671
- break;
672
- case INDEX_op_ld32u_i64:
672
- case INDEX_op_ld32u_i64:
673
- z_mask = 0xffffffffu;
673
- z_mask = 0xffffffffu;
674
- break;
674
- break;
675
-
675
-
676
- CASE_OP_32_64(qemu_ld):
676
- CASE_OP_32_64(qemu_ld):
677
- {
677
- {
678
- MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
678
- MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
679
- MemOp mop = get_memop(oi);
679
- MemOp mop = get_memop(oi);
680
- if (!(mop & MO_SIGN)) {
680
- if (!(mop & MO_SIGN)) {
681
- z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
681
- z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
682
- }
682
- }
683
- }
683
- }
684
- break;
684
- break;
685
-
685
-
686
- CASE_OP_32_64(bswap16):
686
- CASE_OP_32_64(bswap16):
687
- z_mask = arg_info(op->args[1])->z_mask;
687
- z_mask = arg_info(op->args[1])->z_mask;
688
- if (z_mask <= 0xffff) {
688
- if (z_mask <= 0xffff) {
689
- op->args[2] |= TCG_BSWAP_IZ;
689
- op->args[2] |= TCG_BSWAP_IZ;
690
- }
690
- }
691
- z_mask = bswap16(z_mask);
691
- z_mask = bswap16(z_mask);
692
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
692
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
693
- case TCG_BSWAP_OZ:
693
- case TCG_BSWAP_OZ:
694
- break;
694
- break;
695
- case TCG_BSWAP_OS:
695
- case TCG_BSWAP_OS:
696
- z_mask = (int16_t)z_mask;
696
- z_mask = (int16_t)z_mask;
697
- break;
697
- break;
698
- default: /* undefined high bits */
698
- default: /* undefined high bits */
699
- z_mask |= MAKE_64BIT_MASK(16, 48);
699
- z_mask |= MAKE_64BIT_MASK(16, 48);
700
- break;
700
- break;
701
- }
701
- }
702
- break;
702
- break;
703
-
703
-
704
- case INDEX_op_bswap32_i64:
704
- case INDEX_op_bswap32_i64:
705
- z_mask = arg_info(op->args[1])->z_mask;
705
- z_mask = arg_info(op->args[1])->z_mask;
706
- if (z_mask <= 0xffffffffu) {
706
- if (z_mask <= 0xffffffffu) {
707
- op->args[2] |= TCG_BSWAP_IZ;
707
- op->args[2] |= TCG_BSWAP_IZ;
708
- }
708
- }
709
- z_mask = bswap32(z_mask);
709
- z_mask = bswap32(z_mask);
710
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
710
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
711
- case TCG_BSWAP_OZ:
711
- case TCG_BSWAP_OZ:
712
- break;
712
- break;
713
- case TCG_BSWAP_OS:
713
- case TCG_BSWAP_OS:
714
- z_mask = (int32_t)z_mask;
714
- z_mask = (int32_t)z_mask;
715
- break;
715
- break;
716
- default: /* undefined high bits */
716
- default: /* undefined high bits */
717
- z_mask |= MAKE_64BIT_MASK(32, 32);
717
- z_mask |= MAKE_64BIT_MASK(32, 32);
718
- break;
718
- break;
719
- }
719
- }
720
- break;
720
- break;
721
-
721
-
722
- default:
722
- default:
723
- break;
723
- break;
724
- }
724
- }
725
-
725
-
726
- /* 32-bit ops generate 32-bit results. For the result is zero test
726
- /* 32-bit ops generate 32-bit results. For the result is zero test
727
- below, we can ignore high bits, but for further optimizations we
727
- below, we can ignore high bits, but for further optimizations we
728
- need to record that the high bits contain garbage. */
728
- need to record that the high bits contain garbage. */
729
- partmask = z_mask;
729
- partmask = z_mask;
730
- if (ctx.type == TCG_TYPE_I32) {
730
- if (ctx.type == TCG_TYPE_I32) {
731
- z_mask |= ~(tcg_target_ulong)0xffffffffu;
731
- z_mask |= ~(tcg_target_ulong)0xffffffffu;
732
- partmask &= 0xffffffffu;
732
- partmask &= 0xffffffffu;
733
- affected &= 0xffffffffu;
733
- affected &= 0xffffffffu;
734
- }
734
- }
735
- ctx.z_mask = z_mask;
735
- ctx.z_mask = z_mask;
736
-
736
-
737
- if (partmask == 0) {
737
- if (partmask == 0) {
738
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
738
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
739
- continue;
739
- continue;
740
- }
740
- }
741
- if (affected == 0) {
741
- if (affected == 0) {
742
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
742
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
743
- continue;
743
- continue;
744
- }
744
- }
745
+ /* Assume all bits affected, and no bits known zero. */
745
+ /* Assume all bits affected, and no bits known zero. */
746
+ ctx.a_mask = -1;
746
+ ctx.a_mask = -1;
747
+ ctx.z_mask = -1;
747
+ ctx.z_mask = -1;
748
748
749
/*
749
/*
750
* Process each opcode.
750
* Process each opcode.
751
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
751
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
752
case INDEX_op_extrh_i64_i32:
752
case INDEX_op_extrh_i64_i32:
753
done = fold_extu(&ctx, op);
753
done = fold_extu(&ctx, op);
754
break;
754
break;
755
+ CASE_OP_32_64(ld8u):
755
+ CASE_OP_32_64(ld8u):
756
+ CASE_OP_32_64(ld16u):
756
+ CASE_OP_32_64(ld16u):
757
+ case INDEX_op_ld32u_i64:
757
+ case INDEX_op_ld32u_i64:
758
+ done = fold_tcg_ld(&ctx, op);
758
+ done = fold_tcg_ld(&ctx, op);
759
+ break;
759
+ break;
760
case INDEX_op_mb:
760
case INDEX_op_mb:
761
done = fold_mb(&ctx, op);
761
done = fold_mb(&ctx, op);
762
break;
762
break;
763
--
763
--
764
2.25.1
764
2.25.1
765
765
766
766
diff view generated by jsdifflib
1
Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
1
Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
2
and muls2_i64.
2
and muls2_i64.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
8
tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
9
1 file changed, 35 insertions(+), 9 deletions(-)
9
1 file changed, 35 insertions(+), 9 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
16
return false;
16
return false;
17
}
17
}
18
18
19
-static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
19
-static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
20
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
20
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
21
{
21
{
22
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
22
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
23
- uint32_t a = arg_info(op->args[2])->val;
23
- uint32_t a = arg_info(op->args[2])->val;
24
- uint32_t b = arg_info(op->args[3])->val;
24
- uint32_t b = arg_info(op->args[3])->val;
25
- uint64_t r = (uint64_t)a * b;
25
- uint64_t r = (uint64_t)a * b;
26
+ uint64_t a = arg_info(op->args[2])->val;
26
+ uint64_t a = arg_info(op->args[2])->val;
27
+ uint64_t b = arg_info(op->args[3])->val;
27
+ uint64_t b = arg_info(op->args[3])->val;
28
+ uint64_t h, l;
28
+ uint64_t h, l;
29
TCGArg rl, rh;
29
TCGArg rl, rh;
30
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
30
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
31
+ TCGOp *op2;
31
+ TCGOp *op2;
32
+
32
+
33
+ switch (op->opc) {
33
+ switch (op->opc) {
34
+ case INDEX_op_mulu2_i32:
34
+ case INDEX_op_mulu2_i32:
35
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
35
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
36
+ h = (int32_t)(l >> 32);
36
+ h = (int32_t)(l >> 32);
37
+ l = (int32_t)l;
37
+ l = (int32_t)l;
38
+ break;
38
+ break;
39
+ case INDEX_op_muls2_i32:
39
+ case INDEX_op_muls2_i32:
40
+ l = (int64_t)(int32_t)a * (int32_t)b;
40
+ l = (int64_t)(int32_t)a * (int32_t)b;
41
+ h = l >> 32;
41
+ h = l >> 32;
42
+ l = (int32_t)l;
42
+ l = (int32_t)l;
43
+ break;
43
+ break;
44
+ case INDEX_op_mulu2_i64:
44
+ case INDEX_op_mulu2_i64:
45
+ mulu64(&l, &h, a, b);
45
+ mulu64(&l, &h, a, b);
46
+ break;
46
+ break;
47
+ case INDEX_op_muls2_i64:
47
+ case INDEX_op_muls2_i64:
48
+ muls64(&l, &h, a, b);
48
+ muls64(&l, &h, a, b);
49
+ break;
49
+ break;
50
+ default:
50
+ default:
51
+ g_assert_not_reached();
51
+ g_assert_not_reached();
52
+ }
52
+ }
53
53
54
rl = op->args[0];
54
rl = op->args[0];
55
rh = op->args[1];
55
rh = op->args[1];
56
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
56
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
57
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
57
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
58
+
58
+
59
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
59
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
60
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
60
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
61
+
61
+
62
+ tcg_opt_gen_movi(ctx, op, rl, l);
62
+ tcg_opt_gen_movi(ctx, op, rl, l);
63
+ tcg_opt_gen_movi(ctx, op2, rh, h);
63
+ tcg_opt_gen_movi(ctx, op2, rh, h);
64
return true;
64
return true;
65
}
65
}
66
return false;
66
return false;
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
CASE_OP_32_64(muluh):
68
CASE_OP_32_64(muluh):
69
done = fold_mul_highpart(&ctx, op);
69
done = fold_mul_highpart(&ctx, op);
70
break;
70
break;
71
- case INDEX_op_mulu2_i32:
71
- case INDEX_op_mulu2_i32:
72
- done = fold_mulu2_i32(&ctx, op);
72
- done = fold_mulu2_i32(&ctx, op);
73
+ CASE_OP_32_64(muls2):
73
+ CASE_OP_32_64(muls2):
74
+ CASE_OP_32_64(mulu2):
74
+ CASE_OP_32_64(mulu2):
75
+ done = fold_multiply2(&ctx, op);
75
+ done = fold_multiply2(&ctx, op);
76
break;
76
break;
77
CASE_OP_32_64(nand):
77
CASE_OP_32_64(nand):
78
done = fold_nand(&ctx, op);
78
done = fold_nand(&ctx, op);
79
--
79
--
80
2.25.1
80
2.25.1
81
81
82
82
diff view generated by jsdifflib
1
Rename to fold_addsub2.
1
Rename to fold_addsub2.
2
Use Int128 to implement the wider operation.
2
Use Int128 to implement the wider operation.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
9
tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
10
1 file changed, 44 insertions(+), 21 deletions(-)
10
1 file changed, 44 insertions(+), 21 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@
17
*/
17
*/
18
18
19
#include "qemu/osdep.h"
19
#include "qemu/osdep.h"
20
+#include "qemu/int128.h"
20
+#include "qemu/int128.h"
21
#include "tcg/tcg-op.h"
21
#include "tcg/tcg-op.h"
22
#include "tcg-internal.h"
22
#include "tcg-internal.h"
23
23
24
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
24
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
25
return false;
25
return false;
26
}
26
}
27
27
28
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
28
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
29
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
29
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
30
{
30
{
31
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
31
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
32
arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
32
arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
33
- uint32_t al = arg_info(op->args[2])->val;
33
- uint32_t al = arg_info(op->args[2])->val;
34
- uint32_t ah = arg_info(op->args[3])->val;
34
- uint32_t ah = arg_info(op->args[3])->val;
35
- uint32_t bl = arg_info(op->args[4])->val;
35
- uint32_t bl = arg_info(op->args[4])->val;
36
- uint32_t bh = arg_info(op->args[5])->val;
36
- uint32_t bh = arg_info(op->args[5])->val;
37
- uint64_t a = ((uint64_t)ah << 32) | al;
37
- uint64_t a = ((uint64_t)ah << 32) | al;
38
- uint64_t b = ((uint64_t)bh << 32) | bl;
38
- uint64_t b = ((uint64_t)bh << 32) | bl;
39
+ uint64_t al = arg_info(op->args[2])->val;
39
+ uint64_t al = arg_info(op->args[2])->val;
40
+ uint64_t ah = arg_info(op->args[3])->val;
40
+ uint64_t ah = arg_info(op->args[3])->val;
41
+ uint64_t bl = arg_info(op->args[4])->val;
41
+ uint64_t bl = arg_info(op->args[4])->val;
42
+ uint64_t bh = arg_info(op->args[5])->val;
42
+ uint64_t bh = arg_info(op->args[5])->val;
43
TCGArg rl, rh;
43
TCGArg rl, rh;
44
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
44
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
45
+ TCGOp *op2;
45
+ TCGOp *op2;
46
46
47
- if (add) {
47
- if (add) {
48
- a += b;
48
- a += b;
49
+ if (ctx->type == TCG_TYPE_I32) {
49
+ if (ctx->type == TCG_TYPE_I32) {
50
+ uint64_t a = deposit64(al, 32, 32, ah);
50
+ uint64_t a = deposit64(al, 32, 32, ah);
51
+ uint64_t b = deposit64(bl, 32, 32, bh);
51
+ uint64_t b = deposit64(bl, 32, 32, bh);
52
+
52
+
53
+ if (add) {
53
+ if (add) {
54
+ a += b;
54
+ a += b;
55
+ } else {
55
+ } else {
56
+ a -= b;
56
+ a -= b;
57
+ }
57
+ }
58
+
58
+
59
+ al = sextract64(a, 0, 32);
59
+ al = sextract64(a, 0, 32);
60
+ ah = sextract64(a, 32, 32);
60
+ ah = sextract64(a, 32, 32);
61
} else {
61
} else {
62
- a -= b;
62
- a -= b;
63
+ Int128 a = int128_make128(al, ah);
63
+ Int128 a = int128_make128(al, ah);
64
+ Int128 b = int128_make128(bl, bh);
64
+ Int128 b = int128_make128(bl, bh);
65
+
65
+
66
+ if (add) {
66
+ if (add) {
67
+ a = int128_add(a, b);
67
+ a = int128_add(a, b);
68
+ } else {
68
+ } else {
69
+ a = int128_sub(a, b);
69
+ a = int128_sub(a, b);
70
+ }
70
+ }
71
+
71
+
72
+ al = int128_getlo(a);
72
+ al = int128_getlo(a);
73
+ ah = int128_gethi(a);
73
+ ah = int128_gethi(a);
74
}
74
}
75
75
76
rl = op->args[0];
76
rl = op->args[0];
77
rh = op->args[1];
77
rh = op->args[1];
78
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
78
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
79
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
79
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
80
+
80
+
81
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
81
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
82
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
82
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
83
+
83
+
84
+ tcg_opt_gen_movi(ctx, op, rl, al);
84
+ tcg_opt_gen_movi(ctx, op, rl, al);
85
+ tcg_opt_gen_movi(ctx, op2, rh, ah);
85
+ tcg_opt_gen_movi(ctx, op2, rh, ah);
86
return true;
86
return true;
87
}
87
}
88
return false;
88
return false;
89
}
89
}
90
90
91
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
91
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
92
+static bool fold_add2(OptContext *ctx, TCGOp *op)
92
+static bool fold_add2(OptContext *ctx, TCGOp *op)
93
{
93
{
94
- return fold_addsub2_i32(ctx, op, true);
94
- return fold_addsub2_i32(ctx, op, true);
95
+ return fold_addsub2(ctx, op, true);
95
+ return fold_addsub2(ctx, op, true);
96
}
96
}
97
97
98
static bool fold_and(OptContext *ctx, TCGOp *op)
98
static bool fold_and(OptContext *ctx, TCGOp *op)
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
100
return false;
100
return false;
101
}
101
}
102
102
103
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
103
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
104
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
104
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
105
{
105
{
106
- return fold_addsub2_i32(ctx, op, false);
106
- return fold_addsub2_i32(ctx, op, false);
107
+ return fold_addsub2(ctx, op, false);
107
+ return fold_addsub2(ctx, op, false);
108
}
108
}
109
109
110
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
110
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
CASE_OP_32_64_VEC(add):
112
CASE_OP_32_64_VEC(add):
113
done = fold_add(&ctx, op);
113
done = fold_add(&ctx, op);
114
break;
114
break;
115
- case INDEX_op_add2_i32:
115
- case INDEX_op_add2_i32:
116
- done = fold_add2_i32(&ctx, op);
116
- done = fold_add2_i32(&ctx, op);
117
+ CASE_OP_32_64(add2):
117
+ CASE_OP_32_64(add2):
118
+ done = fold_add2(&ctx, op);
118
+ done = fold_add2(&ctx, op);
119
break;
119
break;
120
CASE_OP_32_64_VEC(and):
120
CASE_OP_32_64_VEC(and):
121
done = fold_and(&ctx, op);
121
done = fold_and(&ctx, op);
122
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
122
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
123
CASE_OP_32_64_VEC(sub):
123
CASE_OP_32_64_VEC(sub):
124
done = fold_sub(&ctx, op);
124
done = fold_sub(&ctx, op);
125
break;
125
break;
126
- case INDEX_op_sub2_i32:
126
- case INDEX_op_sub2_i32:
127
- done = fold_sub2_i32(&ctx, op);
127
- done = fold_sub2_i32(&ctx, op);
128
+ CASE_OP_32_64(sub2):
128
+ CASE_OP_32_64(sub2):
129
+ done = fold_sub2(&ctx, op);
129
+ done = fold_sub2(&ctx, op);
130
break;
130
break;
131
CASE_OP_32_64_VEC(xor):
131
CASE_OP_32_64_VEC(xor):
132
done = fold_xor(&ctx, op);
132
done = fold_xor(&ctx, op);
133
--
133
--
134
2.25.1
134
2.25.1
135
135
136
136
diff view generated by jsdifflib
1
Most of these are handled by creating a fold_const2_commutative
1
Most of these are handled by creating a fold_const2_commutative
2
to handle all of the binary operators. The rest were already
2
to handle all of the binary operators. The rest were already
3
handled on a case-by-case basis in the switch, and have their
3
handled on a case-by-case basis in the switch, and have their
4
own fold function in which to place the call.
4
own fold function in which to place the call.
5
5
6
We now have only one major switch on TCGOpcode.
6
We now have only one major switch on TCGOpcode.
7
7
8
Introduce NO_DEST and a block comment for swap_commutative in
8
Introduce NO_DEST and a block comment for swap_commutative in
9
order to make the handling of brcond and movcond opcodes cleaner.
9
order to make the handling of brcond and movcond opcodes cleaner.
10
10
11
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
11
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
13
---
14
tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
14
tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
15
1 file changed, 70 insertions(+), 72 deletions(-)
15
1 file changed, 70 insertions(+), 72 deletions(-)
16
16
17
diff --git a/tcg/optimize.c b/tcg/optimize.c
17
diff --git a/tcg/optimize.c b/tcg/optimize.c
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/optimize.c
19
--- a/tcg/optimize.c
20
+++ b/tcg/optimize.c
20
+++ b/tcg/optimize.c
21
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
21
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
22
return -1;
22
return -1;
23
}
23
}
24
24
25
+/**
25
+/**
26
+ * swap_commutative:
26
+ * swap_commutative:
27
+ * @dest: TCGArg of the destination argument, or NO_DEST.
27
+ * @dest: TCGArg of the destination argument, or NO_DEST.
28
+ * @p1: first paired argument
28
+ * @p1: first paired argument
29
+ * @p2: second paired argument
29
+ * @p2: second paired argument
30
+ *
30
+ *
31
+ * If *@p1 is a constant and *@p2 is not, swap.
31
+ * If *@p1 is a constant and *@p2 is not, swap.
32
+ * If *@p2 matches @dest, swap.
32
+ * If *@p2 matches @dest, swap.
33
+ * Return true if a swap was performed.
33
+ * Return true if a swap was performed.
34
+ */
34
+ */
35
+
35
+
36
+#define NO_DEST temp_arg(NULL)
36
+#define NO_DEST temp_arg(NULL)
37
+
37
+
38
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
38
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
39
{
39
{
40
TCGArg a1 = *p1, a2 = *p2;
40
TCGArg a1 = *p1, a2 = *p2;
41
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
41
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
42
return false;
42
return false;
43
}
43
}
44
44
45
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
45
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
46
+{
46
+{
47
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
47
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
48
+ return fold_const2(ctx, op);
48
+ return fold_const2(ctx, op);
49
+}
49
+}
50
+
50
+
51
static bool fold_masks(OptContext *ctx, TCGOp *op)
51
static bool fold_masks(OptContext *ctx, TCGOp *op)
52
{
52
{
53
uint64_t a_mask = ctx->a_mask;
53
uint64_t a_mask = ctx->a_mask;
54
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
54
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
55
55
56
static bool fold_add(OptContext *ctx, TCGOp *op)
56
static bool fold_add(OptContext *ctx, TCGOp *op)
57
{
57
{
58
- if (fold_const2(ctx, op) ||
58
- if (fold_const2(ctx, op) ||
59
+ if (fold_const2_commutative(ctx, op) ||
59
+ if (fold_const2_commutative(ctx, op) ||
60
fold_xi_to_x(ctx, op, 0)) {
60
fold_xi_to_x(ctx, op, 0)) {
61
return true;
61
return true;
62
}
62
}
63
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
63
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
64
64
65
static bool fold_add2(OptContext *ctx, TCGOp *op)
65
static bool fold_add2(OptContext *ctx, TCGOp *op)
66
{
66
{
67
+ /* Note that the high and low parts may be independently swapped. */
67
+ /* Note that the high and low parts may be independently swapped. */
68
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
68
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
69
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
69
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
70
+
70
+
71
return fold_addsub2(ctx, op, true);
71
return fold_addsub2(ctx, op, true);
72
}
72
}
73
73
74
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
74
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
75
{
75
{
76
uint64_t z1, z2;
76
uint64_t z1, z2;
77
77
78
- if (fold_const2(ctx, op) ||
78
- if (fold_const2(ctx, op) ||
79
+ if (fold_const2_commutative(ctx, op) ||
79
+ if (fold_const2_commutative(ctx, op) ||
80
fold_xi_to_i(ctx, op, 0) ||
80
fold_xi_to_i(ctx, op, 0) ||
81
fold_xi_to_x(ctx, op, -1) ||
81
fold_xi_to_x(ctx, op, -1) ||
82
fold_xx_to_x(ctx, op)) {
82
fold_xx_to_x(ctx, op)) {
83
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
83
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
85
{
85
{
86
TCGCond cond = op->args[2];
86
TCGCond cond = op->args[2];
87
- int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
87
- int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
88
+ int i;
88
+ int i;
89
89
90
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
90
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
91
+ op->args[2] = cond = tcg_swap_cond(cond);
91
+ op->args[2] = cond = tcg_swap_cond(cond);
92
+ }
92
+ }
93
+
93
+
94
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
94
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
95
if (i == 0) {
95
if (i == 0) {
96
tcg_op_remove(ctx->tcg, op);
96
tcg_op_remove(ctx->tcg, op);
97
return true;
97
return true;
98
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
98
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
99
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
99
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
100
{
100
{
101
TCGCond cond = op->args[4];
101
TCGCond cond = op->args[4];
102
- int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
102
- int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
103
TCGArg label = op->args[5];
103
TCGArg label = op->args[5];
104
- int inv = 0;
104
- int inv = 0;
105
+ int i, inv = 0;
105
+ int i, inv = 0;
106
106
107
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
107
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
108
+ op->args[4] = cond = tcg_swap_cond(cond);
108
+ op->args[4] = cond = tcg_swap_cond(cond);
109
+ }
109
+ }
110
+
110
+
111
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
111
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
112
if (i >= 0) {
112
if (i >= 0) {
113
goto do_brcond_const;
113
goto do_brcond_const;
114
}
114
}
115
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
115
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
116
116
117
static bool fold_eqv(OptContext *ctx, TCGOp *op)
117
static bool fold_eqv(OptContext *ctx, TCGOp *op)
118
{
118
{
119
- if (fold_const2(ctx, op) ||
119
- if (fold_const2(ctx, op) ||
120
+ if (fold_const2_commutative(ctx, op) ||
120
+ if (fold_const2_commutative(ctx, op) ||
121
fold_xi_to_x(ctx, op, -1) ||
121
fold_xi_to_x(ctx, op, -1) ||
122
fold_xi_to_not(ctx, op, 0)) {
122
fold_xi_to_not(ctx, op, 0)) {
123
return true;
123
return true;
124
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
124
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
125
static bool fold_movcond(OptContext *ctx, TCGOp *op)
125
static bool fold_movcond(OptContext *ctx, TCGOp *op)
126
{
126
{
127
TCGCond cond = op->args[5];
127
TCGCond cond = op->args[5];
128
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
128
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
129
+ int i;
129
+ int i;
130
130
131
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
131
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
132
+ op->args[5] = cond = tcg_swap_cond(cond);
132
+ op->args[5] = cond = tcg_swap_cond(cond);
133
+ }
133
+ }
134
+ /*
134
+ /*
135
+ * Canonicalize the "false" input reg to match the destination reg so
135
+ * Canonicalize the "false" input reg to match the destination reg so
136
+ * that the tcg backend can implement a "move if true" operation.
136
+ * that the tcg backend can implement a "move if true" operation.
137
+ */
137
+ */
138
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
138
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
139
+ op->args[5] = cond = tcg_invert_cond(cond);
139
+ op->args[5] = cond = tcg_invert_cond(cond);
140
+ }
140
+ }
141
+
141
+
142
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
142
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
143
if (i >= 0) {
143
if (i >= 0) {
144
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
144
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
145
}
145
}
146
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
146
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
147
147
148
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
148
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
149
{
149
{
150
- if (fold_const2(ctx, op) ||
150
- if (fold_const2(ctx, op) ||
151
+ if (fold_const2_commutative(ctx, op) ||
151
+ if (fold_const2_commutative(ctx, op) ||
152
fold_xi_to_i(ctx, op, 0)) {
152
fold_xi_to_i(ctx, op, 0)) {
153
return true;
153
return true;
154
}
154
}
155
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
155
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
156
156
157
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
157
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
158
{
158
{
159
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
159
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
160
+
160
+
161
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
161
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
162
uint64_t a = arg_info(op->args[2])->val;
162
uint64_t a = arg_info(op->args[2])->val;
163
uint64_t b = arg_info(op->args[3])->val;
163
uint64_t b = arg_info(op->args[3])->val;
164
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
164
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
165
165
166
static bool fold_nand(OptContext *ctx, TCGOp *op)
166
static bool fold_nand(OptContext *ctx, TCGOp *op)
167
{
167
{
168
- if (fold_const2(ctx, op) ||
168
- if (fold_const2(ctx, op) ||
169
+ if (fold_const2_commutative(ctx, op) ||
169
+ if (fold_const2_commutative(ctx, op) ||
170
fold_xi_to_not(ctx, op, -1)) {
170
fold_xi_to_not(ctx, op, -1)) {
171
return true;
171
return true;
172
}
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
173
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
174
174
175
static bool fold_nor(OptContext *ctx, TCGOp *op)
175
static bool fold_nor(OptContext *ctx, TCGOp *op)
176
{
176
{
177
- if (fold_const2(ctx, op) ||
177
- if (fold_const2(ctx, op) ||
178
+ if (fold_const2_commutative(ctx, op) ||
178
+ if (fold_const2_commutative(ctx, op) ||
179
fold_xi_to_not(ctx, op, 0)) {
179
fold_xi_to_not(ctx, op, 0)) {
180
return true;
180
return true;
181
}
181
}
182
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
182
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
183
183
184
static bool fold_or(OptContext *ctx, TCGOp *op)
184
static bool fold_or(OptContext *ctx, TCGOp *op)
185
{
185
{
186
- if (fold_const2(ctx, op) ||
186
- if (fold_const2(ctx, op) ||
187
+ if (fold_const2_commutative(ctx, op) ||
187
+ if (fold_const2_commutative(ctx, op) ||
188
fold_xi_to_x(ctx, op, 0) ||
188
fold_xi_to_x(ctx, op, 0) ||
189
fold_xx_to_x(ctx, op)) {
189
fold_xx_to_x(ctx, op)) {
190
return true;
190
return true;
191
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
191
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
192
static bool fold_setcond(OptContext *ctx, TCGOp *op)
192
static bool fold_setcond(OptContext *ctx, TCGOp *op)
193
{
193
{
194
TCGCond cond = op->args[3];
194
TCGCond cond = op->args[3];
195
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
195
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
196
+ int i;
196
+ int i;
197
197
198
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
198
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
199
+ op->args[3] = cond = tcg_swap_cond(cond);
199
+ op->args[3] = cond = tcg_swap_cond(cond);
200
+ }
200
+ }
201
+
201
+
202
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
202
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
203
if (i >= 0) {
203
if (i >= 0) {
204
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
204
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
205
}
205
}
206
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
206
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
207
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
207
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
208
{
208
{
209
TCGCond cond = op->args[5];
209
TCGCond cond = op->args[5];
210
- int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
210
- int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
211
- int inv = 0;
211
- int inv = 0;
212
+ int i, inv = 0;
212
+ int i, inv = 0;
213
213
214
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
214
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
215
+ op->args[5] = cond = tcg_swap_cond(cond);
215
+ op->args[5] = cond = tcg_swap_cond(cond);
216
+ }
216
+ }
217
+
217
+
218
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
218
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
219
if (i >= 0) {
219
if (i >= 0) {
220
goto do_setcond_const;
220
goto do_setcond_const;
221
}
221
}
222
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
222
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
223
223
224
static bool fold_xor(OptContext *ctx, TCGOp *op)
224
static bool fold_xor(OptContext *ctx, TCGOp *op)
225
{
225
{
226
- if (fold_const2(ctx, op) ||
226
- if (fold_const2(ctx, op) ||
227
+ if (fold_const2_commutative(ctx, op) ||
227
+ if (fold_const2_commutative(ctx, op) ||
228
fold_xx_to_i(ctx, op, 0) ||
228
fold_xx_to_i(ctx, op, 0) ||
229
fold_xi_to_x(ctx, op, 0) ||
229
fold_xi_to_x(ctx, op, 0) ||
230
fold_xi_to_not(ctx, op, -1)) {
230
fold_xi_to_not(ctx, op, -1)) {
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
ctx.type = TCG_TYPE_I32;
232
ctx.type = TCG_TYPE_I32;
233
}
233
}
234
234
235
- /* For commutative operations make constant second argument */
235
- /* For commutative operations make constant second argument */
236
- switch (opc) {
236
- switch (opc) {
237
- CASE_OP_32_64_VEC(add):
237
- CASE_OP_32_64_VEC(add):
238
- CASE_OP_32_64_VEC(mul):
238
- CASE_OP_32_64_VEC(mul):
239
- CASE_OP_32_64_VEC(and):
239
- CASE_OP_32_64_VEC(and):
240
- CASE_OP_32_64_VEC(or):
240
- CASE_OP_32_64_VEC(or):
241
- CASE_OP_32_64_VEC(xor):
241
- CASE_OP_32_64_VEC(xor):
242
- CASE_OP_32_64(eqv):
242
- CASE_OP_32_64(eqv):
243
- CASE_OP_32_64(nand):
243
- CASE_OP_32_64(nand):
244
- CASE_OP_32_64(nor):
244
- CASE_OP_32_64(nor):
245
- CASE_OP_32_64(muluh):
245
- CASE_OP_32_64(muluh):
246
- CASE_OP_32_64(mulsh):
246
- CASE_OP_32_64(mulsh):
247
- swap_commutative(op->args[0], &op->args[1], &op->args[2]);
247
- swap_commutative(op->args[0], &op->args[1], &op->args[2]);
248
- break;
248
- break;
249
- CASE_OP_32_64(brcond):
249
- CASE_OP_32_64(brcond):
250
- if (swap_commutative(-1, &op->args[0], &op->args[1])) {
250
- if (swap_commutative(-1, &op->args[0], &op->args[1])) {
251
- op->args[2] = tcg_swap_cond(op->args[2]);
251
- op->args[2] = tcg_swap_cond(op->args[2]);
252
- }
252
- }
253
- break;
253
- break;
254
- CASE_OP_32_64(setcond):
254
- CASE_OP_32_64(setcond):
255
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
255
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
256
- op->args[3] = tcg_swap_cond(op->args[3]);
256
- op->args[3] = tcg_swap_cond(op->args[3]);
257
- }
257
- }
258
- break;
258
- break;
259
- CASE_OP_32_64(movcond):
259
- CASE_OP_32_64(movcond):
260
- if (swap_commutative(-1, &op->args[1], &op->args[2])) {
260
- if (swap_commutative(-1, &op->args[1], &op->args[2])) {
261
- op->args[5] = tcg_swap_cond(op->args[5]);
261
- op->args[5] = tcg_swap_cond(op->args[5]);
262
- }
262
- }
263
- /* For movcond, we canonicalize the "false" input reg to match
263
- /* For movcond, we canonicalize the "false" input reg to match
264
- the destination reg so that the tcg backend can implement
264
- the destination reg so that the tcg backend can implement
265
- a "move if true" operation. */
265
- a "move if true" operation. */
266
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
266
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
267
- op->args[5] = tcg_invert_cond(op->args[5]);
267
- op->args[5] = tcg_invert_cond(op->args[5]);
268
- }
268
- }
269
- break;
269
- break;
270
- CASE_OP_32_64(add2):
270
- CASE_OP_32_64(add2):
271
- swap_commutative(op->args[0], &op->args[2], &op->args[4]);
271
- swap_commutative(op->args[0], &op->args[2], &op->args[4]);
272
- swap_commutative(op->args[1], &op->args[3], &op->args[5]);
272
- swap_commutative(op->args[1], &op->args[3], &op->args[5]);
273
- break;
273
- break;
274
- CASE_OP_32_64(mulu2):
274
- CASE_OP_32_64(mulu2):
275
- CASE_OP_32_64(muls2):
275
- CASE_OP_32_64(muls2):
276
- swap_commutative(op->args[0], &op->args[2], &op->args[3]);
276
- swap_commutative(op->args[0], &op->args[2], &op->args[3]);
277
- break;
277
- break;
278
- case INDEX_op_brcond2_i32:
278
- case INDEX_op_brcond2_i32:
279
- if (swap_commutative2(&op->args[0], &op->args[2])) {
279
- if (swap_commutative2(&op->args[0], &op->args[2])) {
280
- op->args[4] = tcg_swap_cond(op->args[4]);
280
- op->args[4] = tcg_swap_cond(op->args[4]);
281
- }
281
- }
282
- break;
282
- break;
283
- case INDEX_op_setcond2_i32:
283
- case INDEX_op_setcond2_i32:
284
- if (swap_commutative2(&op->args[1], &op->args[3])) {
284
- if (swap_commutative2(&op->args[1], &op->args[3])) {
285
- op->args[5] = tcg_swap_cond(op->args[5]);
285
- op->args[5] = tcg_swap_cond(op->args[5]);
286
- }
286
- }
287
- break;
287
- break;
288
- default:
288
- default:
289
- break;
289
- break;
290
- }
290
- }
291
-
291
-
292
/* Assume all bits affected, and no bits known zero. */
292
/* Assume all bits affected, and no bits known zero. */
293
ctx.a_mask = -1;
293
ctx.a_mask = -1;
294
ctx.z_mask = -1;
294
ctx.z_mask = -1;
295
--
295
--
296
2.25.1
296
2.25.1
297
297
298
298
diff view generated by jsdifflib
New patch
1
Pretending that the source is i64 when it is in fact i32 is
2
incorrect; we have type-changing opcodes that must be used.
3
This bug trips up the subsequent change to the optimizer.
1
4
5
Fixes: 4f2331e5b67a
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
10
tcg/tcg.c | 6 +++---
11
1 file changed, 3 insertions(+), 3 deletions(-)
12
13
diff --git a/tcg/tcg.c b/tcg/tcg.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/tcg.c
16
+++ b/tcg/tcg.c
17
@@ -XXX,XX +XXX,XX @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
18
19
if (is_32bit) {
20
TCGv_i64 temp = tcg_temp_new_i64();
21
- TCGv_i64 orig = temp_tcgv_i64(args[i]);
22
+ TCGv_i32 orig = temp_tcgv_i32(args[i]);
23
if (is_signed) {
24
- tcg_gen_ext32s_i64(temp, orig);
25
+ tcg_gen_ext_i32_i64(temp, orig);
26
} else {
27
- tcg_gen_ext32u_i64(temp, orig);
28
+ tcg_gen_extu_i32_i64(temp, orig);
29
}
30
args[i] = tcgv_i64_temp(temp);
31
}
32
--
33
2.25.1
34
35
diff view generated by jsdifflib
1
This "garbage" setting pre-dates the addition of the type
1
This "garbage" setting pre-dates the addition of the type
2
changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
2
changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
3
and INDEX_op_extr{l,h}_i64_i32.
3
and INDEX_op_extr{l,h}_i64_i32.
4
4
5
So now we have a definitive points at which to adjust z_mask
5
So now we have a definitive points at which to adjust z_mask
6
to eliminate such bits from the 32-bit operands.
6
to eliminate such bits from the 32-bit operands.
7
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
11
---
12
tcg/optimize.c | 35 ++++++++++++++++-------------------
12
tcg/optimize.c | 35 ++++++++++++++++-------------------
13
1 file changed, 16 insertions(+), 19 deletions(-)
13
1 file changed, 16 insertions(+), 19 deletions(-)
14
14
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
17
--- a/tcg/optimize.c
18
+++ b/tcg/optimize.c
18
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
19
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
20
ti->is_const = true;
20
ti->is_const = true;
21
ti->val = ts->val;
21
ti->val = ts->val;
22
ti->z_mask = ts->val;
22
ti->z_mask = ts->val;
23
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
23
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
24
- /* High bits of a 32-bit quantity are garbage. */
24
- /* High bits of a 32-bit quantity are garbage. */
25
- ti->z_mask |= ~0xffffffffull;
25
- ti->z_mask |= ~0xffffffffull;
26
- }
26
- }
27
} else {
27
} else {
28
ti->is_const = false;
28
ti->is_const = false;
29
ti->z_mask = -1;
29
ti->z_mask = -1;
30
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
30
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
31
TCGTemp *src_ts = arg_temp(src);
31
TCGTemp *src_ts = arg_temp(src);
32
TempOptInfo *di;
32
TempOptInfo *di;
33
TempOptInfo *si;
33
TempOptInfo *si;
34
- uint64_t z_mask;
34
- uint64_t z_mask;
35
TCGOpcode new_op;
35
TCGOpcode new_op;
36
36
37
if (ts_are_copies(dst_ts, src_ts)) {
37
if (ts_are_copies(dst_ts, src_ts)) {
38
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
38
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
39
op->args[0] = dst;
39
op->args[0] = dst;
40
op->args[1] = src;
40
op->args[1] = src;
41
41
42
- z_mask = si->z_mask;
42
- z_mask = si->z_mask;
43
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
43
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
44
- /* High bits of the destination are now garbage. */
44
- /* High bits of the destination are now garbage. */
45
- z_mask |= ~0xffffffffull;
45
- z_mask |= ~0xffffffffull;
46
- }
46
- }
47
- di->z_mask = z_mask;
47
- di->z_mask = z_mask;
48
+ di->z_mask = si->z_mask;
48
+ di->z_mask = si->z_mask;
49
49
50
if (src_ts->type == dst_ts->type) {
50
if (src_ts->type == dst_ts->type) {
51
TempOptInfo *ni = ts_info(si->next_copy);
51
TempOptInfo *ni = ts_info(si->next_copy);
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
53
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
53
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
54
TCGArg dst, uint64_t val)
54
TCGArg dst, uint64_t val)
55
{
55
{
56
- /* Convert movi to mov with constant temp. */
56
- /* Convert movi to mov with constant temp. */
57
- TCGTemp *tv = tcg_constant_internal(ctx->type, val);
57
- TCGTemp *tv = tcg_constant_internal(ctx->type, val);
58
+ TCGTemp *tv;
58
+ TCGTemp *tv;
59
59
60
+ if (ctx->type == TCG_TYPE_I32) {
60
+ if (ctx->type == TCG_TYPE_I32) {
61
+ val = (int32_t)val;
61
+ val = (int32_t)val;
62
+ }
62
+ }
63
+
63
+
64
+ /* Convert movi to mov with constant temp. */
64
+ /* Convert movi to mov with constant temp. */
65
+ tv = tcg_constant_internal(ctx->type, val);
65
+ tv = tcg_constant_internal(ctx->type, val);
66
init_ts_info(ctx, tv);
66
init_ts_info(ctx, tv);
67
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
67
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
68
}
68
}
69
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
69
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
70
uint64_t z_mask = ctx->z_mask;
70
uint64_t z_mask = ctx->z_mask;
71
71
72
/*
72
/*
73
- * 32-bit ops generate 32-bit results. For the result is zero test
73
- * 32-bit ops generate 32-bit results. For the result is zero test
74
- * below, we can ignore high bits, but for further optimizations we
74
- * below, we can ignore high bits, but for further optimizations we
75
- * need to record that the high bits contain garbage.
75
- * need to record that the high bits contain garbage.
76
+ * 32-bit ops generate 32-bit results, which for the purpose of
76
+ * 32-bit ops generate 32-bit results, which for the purpose of
77
+ * simplifying tcg are sign-extended. Certainly that's how we
77
+ * simplifying tcg are sign-extended. Certainly that's how we
78
+ * represent our constants elsewhere. Note that the bits will
78
+ * represent our constants elsewhere. Note that the bits will
79
+ * be reset properly for a 64-bit value when encountering the
79
+ * be reset properly for a 64-bit value when encountering the
80
+ * type changing opcodes.
80
+ * type changing opcodes.
81
*/
81
*/
82
if (ctx->type == TCG_TYPE_I32) {
82
if (ctx->type == TCG_TYPE_I32) {
83
- ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
83
- ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
84
- a_mask &= MAKE_64BIT_MASK(0, 32);
84
- a_mask &= MAKE_64BIT_MASK(0, 32);
85
- z_mask &= MAKE_64BIT_MASK(0, 32);
85
- z_mask &= MAKE_64BIT_MASK(0, 32);
86
+ a_mask = (int32_t)a_mask;
86
+ a_mask = (int32_t)a_mask;
87
+ z_mask = (int32_t)z_mask;
87
+ z_mask = (int32_t)z_mask;
88
+ ctx->z_mask = z_mask;
88
+ ctx->z_mask = z_mask;
89
}
89
}
90
90
91
if (z_mask == 0) {
91
if (z_mask == 0) {
92
--
92
--
93
2.25.1
93
2.25.1
94
94
95
95
diff view generated by jsdifflib
1
Recognize the constant function for or-complement.
1
Recognize the constant function for or-complement.
2
2
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 1 +
8
tcg/optimize.c | 1 +
9
1 file changed, 1 insertion(+)
9
1 file changed, 1 insertion(+)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
16
static bool fold_orc(OptContext *ctx, TCGOp *op)
16
static bool fold_orc(OptContext *ctx, TCGOp *op)
17
{
17
{
18
if (fold_const2(ctx, op) ||
18
if (fold_const2(ctx, op) ||
19
+ fold_xx_to_i(ctx, op, -1) ||
19
+ fold_xx_to_i(ctx, op, -1) ||
20
fold_xi_to_x(ctx, op, -1) ||
20
fold_xi_to_x(ctx, op, -1) ||
21
fold_ix_to_not(ctx, op, 0)) {
21
fold_ix_to_not(ctx, op, 0)) {
22
return true;
22
return true;
23
--
23
--
24
2.25.1
24
2.25.1
25
25
26
26
diff view generated by jsdifflib
1
Recognize the identity function for low-part multiply.
1
Recognize the identity function for low-part multiply.
2
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 3 ++-
8
tcg/optimize.c | 3 ++-
9
1 file changed, 2 insertions(+), 1 deletion(-)
9
1 file changed, 2 insertions(+), 1 deletion(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
16
static bool fold_mul(OptContext *ctx, TCGOp *op)
16
static bool fold_mul(OptContext *ctx, TCGOp *op)
17
{
17
{
18
if (fold_const2(ctx, op) ||
18
if (fold_const2(ctx, op) ||
19
- fold_xi_to_i(ctx, op, 0)) {
19
- fold_xi_to_i(ctx, op, 0)) {
20
+ fold_xi_to_i(ctx, op, 0) ||
20
+ fold_xi_to_i(ctx, op, 0) ||
21
+ fold_xi_to_x(ctx, op, 1)) {
21
+ fold_xi_to_x(ctx, op, 1)) {
22
return true;
22
return true;
23
}
23
}
24
return false;
24
return false;
25
--
25
--
26
2.25.1
26
2.25.1
27
27
28
28
diff view generated by jsdifflib
1
Recognize the identity function for division.
1
Recognize the identity function for division.
2
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 6 +++++-
8
tcg/optimize.c | 6 +++++-
9
1 file changed, 5 insertions(+), 1 deletion(-)
9
1 file changed, 5 insertions(+), 1 deletion(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
16
16
17
static bool fold_divide(OptContext *ctx, TCGOp *op)
17
static bool fold_divide(OptContext *ctx, TCGOp *op)
18
{
18
{
19
- return fold_const2(ctx, op);
19
- return fold_const2(ctx, op);
20
+ if (fold_const2(ctx, op) ||
20
+ if (fold_const2(ctx, op) ||
21
+ fold_xi_to_x(ctx, op, 1)) {
21
+ fold_xi_to_x(ctx, op, 1)) {
22
+ return true;
22
+ return true;
23
+ }
23
+ }
24
+ return false;
24
+ return false;
25
}
25
}
26
26
27
static bool fold_dup(OptContext *ctx, TCGOp *op)
27
static bool fold_dup(OptContext *ctx, TCGOp *op)
28
--
28
--
29
2.25.1
29
2.25.1
30
30
31
31
diff view generated by jsdifflib
1
Recognize the constant function for remainder.
1
Recognize the constant function for remainder.
2
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
6
---
7
tcg/optimize.c | 6 +++++-
7
tcg/optimize.c | 6 +++++-
8
1 file changed, 5 insertions(+), 1 deletion(-)
8
1 file changed, 5 insertions(+), 1 deletion(-)
9
9
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
12
--- a/tcg/optimize.c
13
+++ b/tcg/optimize.c
13
+++ b/tcg/optimize.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
15
15
16
static bool fold_remainder(OptContext *ctx, TCGOp *op)
16
static bool fold_remainder(OptContext *ctx, TCGOp *op)
17
{
17
{
18
- return fold_const2(ctx, op);
18
- return fold_const2(ctx, op);
19
+ if (fold_const2(ctx, op) ||
19
+ if (fold_const2(ctx, op) ||
20
+ fold_xx_to_i(ctx, op, 0)) {
20
+ fold_xx_to_i(ctx, op, 0)) {
21
+ return true;
21
+ return true;
22
+ }
22
+ }
23
+ return false;
23
+ return false;
24
}
24
}
25
25
26
static bool fold_setcond(OptContext *ctx, TCGOp *op)
26
static bool fold_setcond(OptContext *ctx, TCGOp *op)
27
--
27
--
28
2.25.1
28
2.25.1
29
29
30
30
diff view generated by jsdifflib
1
Certain targets, like riscv, produce signed 32-bit results.
1
Certain targets, like riscv, produce signed 32-bit results.
2
This can lead to lots of redundant extensions as values are
2
This can lead to lots of redundant extensions as values are
3
manipulated.
3
manipulated.
4
4
5
Begin by tracking only the obvious sign-extensions, and
5
Begin by tracking only the obvious sign-extensions, and
6
converting them to simple copies when possible.
6
converting them to simple copies when possible.
7
7
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
11
---
12
tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
12
tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
13
1 file changed, 102 insertions(+), 21 deletions(-)
13
1 file changed, 102 insertions(+), 21 deletions(-)
14
14
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
17
--- a/tcg/optimize.c
18
+++ b/tcg/optimize.c
18
+++ b/tcg/optimize.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
19
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
20
TCGTemp *next_copy;
20
TCGTemp *next_copy;
21
uint64_t val;
21
uint64_t val;
22
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
22
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
23
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
23
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
24
} TempOptInfo;
24
} TempOptInfo;
25
25
26
typedef struct OptContext {
26
typedef struct OptContext {
27
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
27
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
28
/* In flight values from optimization. */
28
/* In flight values from optimization. */
29
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
29
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
30
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
30
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
31
+ uint64_t s_mask; /* mask of clrsb(value) bits */
31
+ uint64_t s_mask; /* mask of clrsb(value) bits */
32
TCGType type;
32
TCGType type;
33
} OptContext;
33
} OptContext;
34
34
35
+/* Calculate the smask for a specific value. */
35
+/* Calculate the smask for a specific value. */
36
+static uint64_t smask_from_value(uint64_t value)
36
+static uint64_t smask_from_value(uint64_t value)
37
+{
37
+{
38
+ int rep = clrsb64(value);
38
+ int rep = clrsb64(value);
39
+ return ~(~0ull >> rep);
39
+ return ~(~0ull >> rep);
40
+}
40
+}
41
+
41
+
42
+/*
42
+/*
43
+ * Calculate the smask for a given set of known-zeros.
43
+ * Calculate the smask for a given set of known-zeros.
44
+ * If there are lots of zeros on the left, we can consider the remainder
44
+ * If there are lots of zeros on the left, we can consider the remainder
45
+ * an unsigned field, and thus the corresponding signed field is one bit
45
+ * an unsigned field, and thus the corresponding signed field is one bit
46
+ * larger.
46
+ * larger.
47
+ */
47
+ */
48
+static uint64_t smask_from_zmask(uint64_t zmask)
48
+static uint64_t smask_from_zmask(uint64_t zmask)
49
+{
49
+{
50
+ /*
50
+ /*
51
+ * Only the 0 bits are significant for zmask, thus the msb itself
51
+ * Only the 0 bits are significant for zmask, thus the msb itself
52
+ * must be zero, else we have no sign information.
52
+ * must be zero, else we have no sign information.
53
+ */
53
+ */
54
+ int rep = clz64(zmask);
54
+ int rep = clz64(zmask);
55
+ if (rep == 0) {
55
+ if (rep == 0) {
56
+ return 0;
56
+ return 0;
57
+ }
57
+ }
58
+ rep -= 1;
58
+ rep -= 1;
59
+ return ~(~0ull >> rep);
59
+ return ~(~0ull >> rep);
60
+}
60
+}
61
+
61
+
62
static inline TempOptInfo *ts_info(TCGTemp *ts)
62
static inline TempOptInfo *ts_info(TCGTemp *ts)
63
{
63
{
64
return ts->state_ptr;
64
return ts->state_ptr;
65
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
65
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
66
ti->prev_copy = ts;
66
ti->prev_copy = ts;
67
ti->is_const = false;
67
ti->is_const = false;
68
ti->z_mask = -1;
68
ti->z_mask = -1;
69
+ ti->s_mask = 0;
69
+ ti->s_mask = 0;
70
}
70
}
71
71
72
static void reset_temp(TCGArg arg)
72
static void reset_temp(TCGArg arg)
73
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
73
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
74
ti->is_const = true;
74
ti->is_const = true;
75
ti->val = ts->val;
75
ti->val = ts->val;
76
ti->z_mask = ts->val;
76
ti->z_mask = ts->val;
77
+ ti->s_mask = smask_from_value(ts->val);
77
+ ti->s_mask = smask_from_value(ts->val);
78
} else {
78
} else {
79
ti->is_const = false;
79
ti->is_const = false;
80
ti->z_mask = -1;
80
ti->z_mask = -1;
81
+ ti->s_mask = 0;
81
+ ti->s_mask = 0;
82
}
82
}
83
}
83
}
84
84
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
86
op->args[1] = src;
86
op->args[1] = src;
87
87
88
di->z_mask = si->z_mask;
88
di->z_mask = si->z_mask;
89
+ di->s_mask = si->s_mask;
89
+ di->s_mask = si->s_mask;
90
90
91
if (src_ts->type == dst_ts->type) {
91
if (src_ts->type == dst_ts->type) {
92
TempOptInfo *ni = ts_info(si->next_copy);
92
TempOptInfo *ni = ts_info(si->next_copy);
93
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
93
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
94
94
95
nb_oargs = def->nb_oargs;
95
nb_oargs = def->nb_oargs;
96
for (i = 0; i < nb_oargs; i++) {
96
for (i = 0; i < nb_oargs; i++) {
97
- reset_temp(op->args[i]);
97
- reset_temp(op->args[i]);
98
+ TCGTemp *ts = arg_temp(op->args[i]);
98
+ TCGTemp *ts = arg_temp(op->args[i]);
99
+ reset_ts(ts);
99
+ reset_ts(ts);
100
/*
100
/*
101
- * Save the corresponding known-zero bits mask for the
101
- * Save the corresponding known-zero bits mask for the
102
+ * Save the corresponding known-zero/sign bits mask for the
102
+ * Save the corresponding known-zero/sign bits mask for the
103
* first output argument (only one supported so far).
103
* first output argument (only one supported so far).
104
*/
104
*/
105
if (i == 0) {
105
if (i == 0) {
106
- arg_info(op->args[i])->z_mask = ctx->z_mask;
106
- arg_info(op->args[i])->z_mask = ctx->z_mask;
107
+ ts_info(ts)->z_mask = ctx->z_mask;
107
+ ts_info(ts)->z_mask = ctx->z_mask;
108
+ ts_info(ts)->s_mask = ctx->s_mask;
108
+ ts_info(ts)->s_mask = ctx->s_mask;
109
}
109
}
110
}
110
}
111
}
111
}
112
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
112
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
113
{
113
{
114
uint64_t a_mask = ctx->a_mask;
114
uint64_t a_mask = ctx->a_mask;
115
uint64_t z_mask = ctx->z_mask;
115
uint64_t z_mask = ctx->z_mask;
116
+ uint64_t s_mask = ctx->s_mask;
116
+ uint64_t s_mask = ctx->s_mask;
117
117
118
/*
118
/*
119
* 32-bit ops generate 32-bit results, which for the purpose of
119
* 32-bit ops generate 32-bit results, which for the purpose of
120
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
121
if (ctx->type == TCG_TYPE_I32) {
121
if (ctx->type == TCG_TYPE_I32) {
122
a_mask = (int32_t)a_mask;
122
a_mask = (int32_t)a_mask;
123
z_mask = (int32_t)z_mask;
123
z_mask = (int32_t)z_mask;
124
+ s_mask |= MAKE_64BIT_MASK(32, 32);
124
+ s_mask |= MAKE_64BIT_MASK(32, 32);
125
ctx->z_mask = z_mask;
125
ctx->z_mask = z_mask;
126
+ ctx->s_mask = s_mask;
126
+ ctx->s_mask = s_mask;
127
}
127
}
128
128
129
if (z_mask == 0) {
129
if (z_mask == 0) {
130
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
130
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
131
131
132
static bool fold_bswap(OptContext *ctx, TCGOp *op)
132
static bool fold_bswap(OptContext *ctx, TCGOp *op)
133
{
133
{
134
- uint64_t z_mask, sign;
134
- uint64_t z_mask, sign;
135
+ uint64_t z_mask, s_mask, sign;
135
+ uint64_t z_mask, s_mask, sign;
136
136
137
if (arg_is_const(op->args[1])) {
137
if (arg_is_const(op->args[1])) {
138
uint64_t t = arg_info(op->args[1])->val;
138
uint64_t t = arg_info(op->args[1])->val;
139
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
139
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
140
}
140
}
141
141
142
z_mask = arg_info(op->args[1])->z_mask;
142
z_mask = arg_info(op->args[1])->z_mask;
143
+
143
+
144
switch (op->opc) {
144
switch (op->opc) {
145
case INDEX_op_bswap16_i32:
145
case INDEX_op_bswap16_i32:
146
case INDEX_op_bswap16_i64:
146
case INDEX_op_bswap16_i64:
147
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
147
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
148
default:
148
default:
149
g_assert_not_reached();
149
g_assert_not_reached();
150
}
150
}
151
+ s_mask = smask_from_zmask(z_mask);
151
+ s_mask = smask_from_zmask(z_mask);
152
152
153
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
153
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
154
case TCG_BSWAP_OZ:
154
case TCG_BSWAP_OZ:
155
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
155
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
156
/* If the sign bit may be 1, force all the bits above to 1. */
156
/* If the sign bit may be 1, force all the bits above to 1. */
157
if (z_mask & sign) {
157
if (z_mask & sign) {
158
z_mask |= sign;
158
z_mask |= sign;
159
+ s_mask = sign << 1;
159
+ s_mask = sign << 1;
160
}
160
}
161
break;
161
break;
162
default:
162
default:
163
/* The high bits are undefined: force all bits above the sign to 1. */
163
/* The high bits are undefined: force all bits above the sign to 1. */
164
z_mask |= sign << 1;
164
z_mask |= sign << 1;
165
+ s_mask = 0;
165
+ s_mask = 0;
166
break;
166
break;
167
}
167
}
168
ctx->z_mask = z_mask;
168
ctx->z_mask = z_mask;
169
+ ctx->s_mask = s_mask;
169
+ ctx->s_mask = s_mask;
170
170
171
return fold_masks(ctx, op);
171
return fold_masks(ctx, op);
172
}
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
173
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
174
static bool fold_extract(OptContext *ctx, TCGOp *op)
174
static bool fold_extract(OptContext *ctx, TCGOp *op)
175
{
175
{
176
uint64_t z_mask_old, z_mask;
176
uint64_t z_mask_old, z_mask;
177
+ int pos = op->args[2];
177
+ int pos = op->args[2];
178
+ int len = op->args[3];
178
+ int len = op->args[3];
179
179
180
if (arg_is_const(op->args[1])) {
180
if (arg_is_const(op->args[1])) {
181
uint64_t t;
181
uint64_t t;
182
182
183
t = arg_info(op->args[1])->val;
183
t = arg_info(op->args[1])->val;
184
- t = extract64(t, op->args[2], op->args[3]);
184
- t = extract64(t, op->args[2], op->args[3]);
185
+ t = extract64(t, pos, len);
185
+ t = extract64(t, pos, len);
186
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
186
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
187
}
187
}
188
188
189
z_mask_old = arg_info(op->args[1])->z_mask;
189
z_mask_old = arg_info(op->args[1])->z_mask;
190
- z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
190
- z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
191
- if (op->args[2] == 0) {
191
- if (op->args[2] == 0) {
192
+ z_mask = extract64(z_mask_old, pos, len);
192
+ z_mask = extract64(z_mask_old, pos, len);
193
+ if (pos == 0) {
193
+ if (pos == 0) {
194
ctx->a_mask = z_mask_old ^ z_mask;
194
ctx->a_mask = z_mask_old ^ z_mask;
195
}
195
}
196
ctx->z_mask = z_mask;
196
ctx->z_mask = z_mask;
197
+ ctx->s_mask = smask_from_zmask(z_mask);
197
+ ctx->s_mask = smask_from_zmask(z_mask);
198
198
199
return fold_masks(ctx, op);
199
return fold_masks(ctx, op);
200
}
200
}
201
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
201
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
202
202
203
static bool fold_exts(OptContext *ctx, TCGOp *op)
203
static bool fold_exts(OptContext *ctx, TCGOp *op)
204
{
204
{
205
- uint64_t z_mask_old, z_mask, sign;
205
- uint64_t z_mask_old, z_mask, sign;
206
+ uint64_t s_mask_old, s_mask, z_mask, sign;
206
+ uint64_t s_mask_old, s_mask, z_mask, sign;
207
bool type_change = false;
207
bool type_change = false;
208
208
209
if (fold_const1(ctx, op)) {
209
if (fold_const1(ctx, op)) {
210
return true;
210
return true;
211
}
211
}
212
212
213
- z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
213
- z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
214
+ z_mask = arg_info(op->args[1])->z_mask;
214
+ z_mask = arg_info(op->args[1])->z_mask;
215
+ s_mask = arg_info(op->args[1])->s_mask;
215
+ s_mask = arg_info(op->args[1])->s_mask;
216
+ s_mask_old = s_mask;
216
+ s_mask_old = s_mask;
217
217
218
switch (op->opc) {
218
switch (op->opc) {
219
CASE_OP_32_64(ext8s):
219
CASE_OP_32_64(ext8s):
220
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
220
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
221
221
222
if (z_mask & sign) {
222
if (z_mask & sign) {
223
z_mask |= sign;
223
z_mask |= sign;
224
- } else if (!type_change) {
224
- } else if (!type_change) {
225
- ctx->a_mask = z_mask_old ^ z_mask;
225
- ctx->a_mask = z_mask_old ^ z_mask;
226
}
226
}
227
+ s_mask |= sign << 1;
227
+ s_mask |= sign << 1;
228
+
228
+
229
ctx->z_mask = z_mask;
229
ctx->z_mask = z_mask;
230
+ ctx->s_mask = s_mask;
230
+ ctx->s_mask = s_mask;
231
+ if (!type_change) {
231
+ if (!type_change) {
232
+ ctx->a_mask = s_mask & ~s_mask_old;
232
+ ctx->a_mask = s_mask & ~s_mask_old;
233
+ }
233
+ }
234
234
235
return fold_masks(ctx, op);
235
return fold_masks(ctx, op);
236
}
236
}
237
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
237
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
238
}
238
}
239
239
240
ctx->z_mask = z_mask;
240
ctx->z_mask = z_mask;
241
+ ctx->s_mask = smask_from_zmask(z_mask);
241
+ ctx->s_mask = smask_from_zmask(z_mask);
242
if (!type_change) {
242
if (!type_change) {
243
ctx->a_mask = z_mask_old ^ z_mask;
243
ctx->a_mask = z_mask_old ^ z_mask;
244
}
244
}
245
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
245
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
246
MemOp mop = get_memop(oi);
246
MemOp mop = get_memop(oi);
247
int width = 8 * memop_size(mop);
247
int width = 8 * memop_size(mop);
248
248
249
- if (!(mop & MO_SIGN) && width < 64) {
249
- if (!(mop & MO_SIGN) && width < 64) {
250
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
250
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
251
+ if (width < 64) {
251
+ if (width < 64) {
252
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
252
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
253
+ if (!(mop & MO_SIGN)) {
253
+ if (!(mop & MO_SIGN)) {
254
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
254
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
255
+ ctx->s_mask <<= 1;
255
+ ctx->s_mask <<= 1;
256
+ }
256
+ }
257
}
257
}
258
258
259
/* Opcodes that touch guest memory stop the mb optimization. */
259
/* Opcodes that touch guest memory stop the mb optimization. */
260
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
260
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
261
261
262
static bool fold_sextract(OptContext *ctx, TCGOp *op)
262
static bool fold_sextract(OptContext *ctx, TCGOp *op)
263
{
263
{
264
- int64_t z_mask_old, z_mask;
264
- int64_t z_mask_old, z_mask;
265
+ uint64_t z_mask, s_mask, s_mask_old;
265
+ uint64_t z_mask, s_mask, s_mask_old;
266
+ int pos = op->args[2];
266
+ int pos = op->args[2];
267
+ int len = op->args[3];
267
+ int len = op->args[3];
268
268
269
if (arg_is_const(op->args[1])) {
269
if (arg_is_const(op->args[1])) {
270
uint64_t t;
270
uint64_t t;
271
271
272
t = arg_info(op->args[1])->val;
272
t = arg_info(op->args[1])->val;
273
- t = sextract64(t, op->args[2], op->args[3]);
273
- t = sextract64(t, op->args[2], op->args[3]);
274
+ t = sextract64(t, pos, len);
274
+ t = sextract64(t, pos, len);
275
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
275
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
276
}
276
}
277
277
278
- z_mask_old = arg_info(op->args[1])->z_mask;
278
- z_mask_old = arg_info(op->args[1])->z_mask;
279
- z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
279
- z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
280
- if (op->args[2] == 0 && z_mask >= 0) {
280
- if (op->args[2] == 0 && z_mask >= 0) {
281
- ctx->a_mask = z_mask_old ^ z_mask;
281
- ctx->a_mask = z_mask_old ^ z_mask;
282
- }
282
- }
283
+ z_mask = arg_info(op->args[1])->z_mask;
283
+ z_mask = arg_info(op->args[1])->z_mask;
284
+ z_mask = sextract64(z_mask, pos, len);
284
+ z_mask = sextract64(z_mask, pos, len);
285
ctx->z_mask = z_mask;
285
ctx->z_mask = z_mask;
286
286
287
+ s_mask_old = arg_info(op->args[1])->s_mask;
287
+ s_mask_old = arg_info(op->args[1])->s_mask;
288
+ s_mask = sextract64(s_mask_old, pos, len);
288
+ s_mask = sextract64(s_mask_old, pos, len);
289
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
289
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
290
+ ctx->s_mask = s_mask;
290
+ ctx->s_mask = s_mask;
291
+
291
+
292
+ if (pos == 0) {
292
+ if (pos == 0) {
293
+ ctx->a_mask = s_mask & ~s_mask_old;
293
+ ctx->a_mask = s_mask & ~s_mask_old;
294
+ }
294
+ }
295
+
295
+
296
return fold_masks(ctx, op);
296
return fold_masks(ctx, op);
297
}
297
}
298
298
299
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
299
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
300
{
300
{
301
/* We can't do any folding with a load, but we can record bits. */
301
/* We can't do any folding with a load, but we can record bits. */
302
switch (op->opc) {
302
switch (op->opc) {
303
+ CASE_OP_32_64(ld8s):
303
+ CASE_OP_32_64(ld8s):
304
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
304
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
305
+ break;
305
+ break;
306
CASE_OP_32_64(ld8u):
306
CASE_OP_32_64(ld8u):
307
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
307
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
308
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
308
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
309
+ break;
309
+ break;
310
+ CASE_OP_32_64(ld16s):
310
+ CASE_OP_32_64(ld16s):
311
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
311
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
312
break;
312
break;
313
CASE_OP_32_64(ld16u):
313
CASE_OP_32_64(ld16u):
314
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
314
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
315
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
315
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
316
+ break;
316
+ break;
317
+ case INDEX_op_ld32s_i64:
317
+ case INDEX_op_ld32s_i64:
318
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
318
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
319
break;
319
break;
320
case INDEX_op_ld32u_i64:
320
case INDEX_op_ld32u_i64:
321
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
321
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
322
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
322
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
323
break;
323
break;
324
default:
324
default:
325
g_assert_not_reached();
325
g_assert_not_reached();
326
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
326
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
327
ctx.type = TCG_TYPE_I32;
327
ctx.type = TCG_TYPE_I32;
328
}
328
}
329
329
330
- /* Assume all bits affected, and no bits known zero. */
330
- /* Assume all bits affected, and no bits known zero. */
331
+ /* Assume all bits affected, no bits known zero, no sign reps. */
331
+ /* Assume all bits affected, no bits known zero, no sign reps. */
332
ctx.a_mask = -1;
332
ctx.a_mask = -1;
333
ctx.z_mask = -1;
333
ctx.z_mask = -1;
334
+ ctx.s_mask = 0;
334
+ ctx.s_mask = 0;
335
335
336
/*
336
/*
337
* Process each opcode.
337
* Process each opcode.
338
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
338
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
339
case INDEX_op_extrh_i64_i32:
339
case INDEX_op_extrh_i64_i32:
340
done = fold_extu(&ctx, op);
340
done = fold_extu(&ctx, op);
341
break;
341
break;
342
+ CASE_OP_32_64(ld8s):
342
+ CASE_OP_32_64(ld8s):
343
CASE_OP_32_64(ld8u):
343
CASE_OP_32_64(ld8u):
344
+ CASE_OP_32_64(ld16s):
344
+ CASE_OP_32_64(ld16s):
345
CASE_OP_32_64(ld16u):
345
CASE_OP_32_64(ld16u):
346
+ case INDEX_op_ld32s_i64:
346
+ case INDEX_op_ld32s_i64:
347
case INDEX_op_ld32u_i64:
347
case INDEX_op_ld32u_i64:
348
done = fold_tcg_ld(&ctx, op);
348
done = fold_tcg_ld(&ctx, op);
349
break;
349
break;
350
--
350
--
351
2.25.1
351
2.25.1
352
352
353
353
diff view generated by jsdifflib
1
Sign repetitions are perforce all identical, whether they are 1 or 0.
1
Sign repetitions are perforce all identical, whether they are 1 or 0.
2
Bitwise operations preserve the relative quantity of the repetitions.
2
Bitwise operations preserve the relative quantity of the repetitions.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 29 +++++++++++++++++++++++++++++
9
tcg/optimize.c | 29 +++++++++++++++++++++++++++++
10
1 file changed, 29 insertions(+)
10
1 file changed, 29 insertions(+)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
17
z2 = arg_info(op->args[2])->z_mask;
17
z2 = arg_info(op->args[2])->z_mask;
18
ctx->z_mask = z1 & z2;
18
ctx->z_mask = z1 & z2;
19
19
20
+ /*
20
+ /*
21
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
21
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
22
+ * Bitwise operations preserve the relative quantity of the repetitions.
22
+ * Bitwise operations preserve the relative quantity of the repetitions.
23
+ */
23
+ */
24
+ ctx->s_mask = arg_info(op->args[1])->s_mask
24
+ ctx->s_mask = arg_info(op->args[1])->s_mask
25
+ & arg_info(op->args[2])->s_mask;
25
+ & arg_info(op->args[2])->s_mask;
26
+
26
+
27
/*
27
/*
28
* Known-zeros does not imply known-ones. Therefore unless
28
* Known-zeros does not imply known-ones. Therefore unless
29
* arg2 is constant, we can't infer affected bits from it.
29
* arg2 is constant, we can't infer affected bits from it.
30
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
30
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
31
}
31
}
32
ctx->z_mask = z1;
32
ctx->z_mask = z1;
33
33
34
+ ctx->s_mask = arg_info(op->args[1])->s_mask
34
+ ctx->s_mask = arg_info(op->args[1])->s_mask
35
+ & arg_info(op->args[2])->s_mask;
35
+ & arg_info(op->args[2])->s_mask;
36
return fold_masks(ctx, op);
36
return fold_masks(ctx, op);
37
}
37
}
38
38
39
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
39
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
40
fold_xi_to_not(ctx, op, 0)) {
40
fold_xi_to_not(ctx, op, 0)) {
41
return true;
41
return true;
42
}
42
}
43
+
43
+
44
+ ctx->s_mask = arg_info(op->args[1])->s_mask
44
+ ctx->s_mask = arg_info(op->args[1])->s_mask
45
+ & arg_info(op->args[2])->s_mask;
45
+ & arg_info(op->args[2])->s_mask;
46
return false;
46
return false;
47
}
47
}
48
48
49
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
49
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
50
50
51
ctx->z_mask = arg_info(op->args[3])->z_mask
51
ctx->z_mask = arg_info(op->args[3])->z_mask
52
| arg_info(op->args[4])->z_mask;
52
| arg_info(op->args[4])->z_mask;
53
+ ctx->s_mask = arg_info(op->args[3])->s_mask
53
+ ctx->s_mask = arg_info(op->args[3])->s_mask
54
+ & arg_info(op->args[4])->s_mask;
54
+ & arg_info(op->args[4])->s_mask;
55
55
56
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
56
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
57
uint64_t tv = arg_info(op->args[3])->val;
57
uint64_t tv = arg_info(op->args[3])->val;
58
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
58
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
59
fold_xi_to_not(ctx, op, -1)) {
59
fold_xi_to_not(ctx, op, -1)) {
60
return true;
60
return true;
61
}
61
}
62
+
62
+
63
+ ctx->s_mask = arg_info(op->args[1])->s_mask
63
+ ctx->s_mask = arg_info(op->args[1])->s_mask
64
+ & arg_info(op->args[2])->s_mask;
64
+ & arg_info(op->args[2])->s_mask;
65
return false;
65
return false;
66
}
66
}
67
67
68
@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
68
@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
69
fold_xi_to_not(ctx, op, 0)) {
69
fold_xi_to_not(ctx, op, 0)) {
70
return true;
70
return true;
71
}
71
}
72
+
72
+
73
+ ctx->s_mask = arg_info(op->args[1])->s_mask
73
+ ctx->s_mask = arg_info(op->args[1])->s_mask
74
+ & arg_info(op->args[2])->s_mask;
74
+ & arg_info(op->args[2])->s_mask;
75
return false;
75
return false;
76
}
76
}
77
77
78
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
78
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
79
return true;
79
return true;
80
}
80
}
81
81
82
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
82
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
83
+
83
+
84
/* Because of fold_to_not, we want to always return true, via finish. */
84
/* Because of fold_to_not, we want to always return true, via finish. */
85
finish_folding(ctx, op);
85
finish_folding(ctx, op);
86
return true;
86
return true;
87
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
87
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
88
88
89
ctx->z_mask = arg_info(op->args[1])->z_mask
89
ctx->z_mask = arg_info(op->args[1])->z_mask
90
| arg_info(op->args[2])->z_mask;
90
| arg_info(op->args[2])->z_mask;
91
+ ctx->s_mask = arg_info(op->args[1])->s_mask
91
+ ctx->s_mask = arg_info(op->args[1])->s_mask
92
+ & arg_info(op->args[2])->s_mask;
92
+ & arg_info(op->args[2])->s_mask;
93
return fold_masks(ctx, op);
93
return fold_masks(ctx, op);
94
}
94
}
95
95
96
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
96
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
97
fold_ix_to_not(ctx, op, 0)) {
97
fold_ix_to_not(ctx, op, 0)) {
98
return true;
98
return true;
99
}
99
}
100
+
100
+
101
+ ctx->s_mask = arg_info(op->args[1])->s_mask
101
+ ctx->s_mask = arg_info(op->args[1])->s_mask
102
+ & arg_info(op->args[2])->s_mask;
102
+ & arg_info(op->args[2])->s_mask;
103
return false;
103
return false;
104
}
104
}
105
105
106
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
106
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
107
107
108
ctx->z_mask = arg_info(op->args[1])->z_mask
108
ctx->z_mask = arg_info(op->args[1])->z_mask
109
| arg_info(op->args[2])->z_mask;
109
| arg_info(op->args[2])->z_mask;
110
+ ctx->s_mask = arg_info(op->args[1])->s_mask
110
+ ctx->s_mask = arg_info(op->args[1])->s_mask
111
+ & arg_info(op->args[2])->s_mask;
111
+ & arg_info(op->args[2])->s_mask;
112
return fold_masks(ctx, op);
112
return fold_masks(ctx, op);
113
}
113
}
114
114
115
--
115
--
116
2.25.1
116
2.25.1
117
117
118
118
diff view generated by jsdifflib
1
The result is either 0 or 1, which means that we have
1
The result is either 0 or 1, which means that we have
2
a 2 bit signed result, and thus 62 bits of sign.
2
a 2 bit signed result, and thus 62 bits of sign.
3
For clarity, use the smask_from_zmask function.
3
For clarity, use the smask_from_zmask function.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 2 ++
9
tcg/optimize.c | 2 ++
10
1 file changed, 2 insertions(+)
10
1 file changed, 2 insertions(+)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
17
}
17
}
18
18
19
ctx->z_mask = 1;
19
ctx->z_mask = 1;
20
+ ctx->s_mask = smask_from_zmask(1);
20
+ ctx->s_mask = smask_from_zmask(1);
21
return false;
21
return false;
22
}
22
}
23
23
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
25
}
25
}
26
26
27
ctx->z_mask = 1;
27
ctx->z_mask = 1;
28
+ ctx->s_mask = smask_from_zmask(1);
28
+ ctx->s_mask = smask_from_zmask(1);
29
return false;
29
return false;
30
30
31
do_setcond_const:
31
do_setcond_const:
32
--
32
--
33
2.25.1
33
2.25.1
34
34
35
35
diff view generated by jsdifflib
1
The results are generally 6 bit unsigned values, though
1
The results are generally 6 bit unsigned values, though
2
the count leading and trailing bits may produce any value
2
the count leading and trailing bits may produce any value
3
for a zero input.
3
for a zero input.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 3 ++-
9
tcg/optimize.c | 3 ++-
10
1 file changed, 2 insertions(+), 1 deletion(-)
10
1 file changed, 2 insertions(+), 1 deletion(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
17
g_assert_not_reached();
17
g_assert_not_reached();
18
}
18
}
19
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
19
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
20
-
20
-
21
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
21
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
22
return false;
22
return false;
23
}
23
}
24
24
25
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
25
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
26
default:
26
default:
27
g_assert_not_reached();
27
g_assert_not_reached();
28
}
28
}
29
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
29
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
30
return false;
30
return false;
31
}
31
}
32
32
33
--
33
--
34
2.25.1
34
2.25.1
35
35
36
36
diff view generated by jsdifflib
1
For constant shifts, we can simply shift the s_mask.
1
For constant shifts, we can simply shift the s_mask.
2
2
3
For variable shifts, we know that sar does not reduce
3
For variable shifts, we know that sar does not reduce
4
the s_mask, which helps for sequences like
4
the s_mask, which helps for sequences like
5
5
6
ext32s_i64 t, in
6
ext32s_i64 t, in
7
sar_i64 t, t, v
7
sar_i64 t, t, v
8
ext32s_i64 out, t
8
ext32s_i64 out, t
9
9
10
allowing the final extend to be eliminated.
10
allowing the final extend to be eliminated.
11
11
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
13
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
15
---
16
tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
16
tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
17
1 file changed, 47 insertions(+), 3 deletions(-)
17
1 file changed, 47 insertions(+), 3 deletions(-)
18
18
19
diff --git a/tcg/optimize.c b/tcg/optimize.c
19
diff --git a/tcg/optimize.c b/tcg/optimize.c
20
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/optimize.c
21
--- a/tcg/optimize.c
22
+++ b/tcg/optimize.c
22
+++ b/tcg/optimize.c
23
@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
23
@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
24
return ~(~0ull >> rep);
24
return ~(~0ull >> rep);
25
}
25
}
26
26
27
+/*
27
+/*
28
+ * Recreate a properly left-aligned smask after manipulation.
28
+ * Recreate a properly left-aligned smask after manipulation.
29
+ * Some bit-shuffling, particularly shifts and rotates, may
29
+ * Some bit-shuffling, particularly shifts and rotates, may
30
+ * retain sign bits on the left, but may scatter disconnected
30
+ * retain sign bits on the left, but may scatter disconnected
31
+ * sign bits on the right. Retain only what remains to the left.
31
+ * sign bits on the right. Retain only what remains to the left.
32
+ */
32
+ */
33
+static uint64_t smask_from_smask(int64_t smask)
33
+static uint64_t smask_from_smask(int64_t smask)
34
+{
34
+{
35
+ /* Only the 1 bits are significant for smask */
35
+ /* Only the 1 bits are significant for smask */
36
+ return smask_from_zmask(~smask);
36
+ return smask_from_zmask(~smask);
37
+}
37
+}
38
+
38
+
39
static inline TempOptInfo *ts_info(TCGTemp *ts)
39
static inline TempOptInfo *ts_info(TCGTemp *ts)
40
{
40
{
41
return ts->state_ptr;
41
return ts->state_ptr;
42
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
42
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
43
43
44
static bool fold_shift(OptContext *ctx, TCGOp *op)
44
static bool fold_shift(OptContext *ctx, TCGOp *op)
45
{
45
{
46
+ uint64_t s_mask, z_mask, sign;
46
+ uint64_t s_mask, z_mask, sign;
47
+
47
+
48
if (fold_const2(ctx, op) ||
48
if (fold_const2(ctx, op) ||
49
fold_ix_to_i(ctx, op, 0) ||
49
fold_ix_to_i(ctx, op, 0) ||
50
fold_xi_to_x(ctx, op, 0)) {
50
fold_xi_to_x(ctx, op, 0)) {
51
return true;
51
return true;
52
}
52
}
53
53
54
+ s_mask = arg_info(op->args[1])->s_mask;
54
+ s_mask = arg_info(op->args[1])->s_mask;
55
+ z_mask = arg_info(op->args[1])->z_mask;
55
+ z_mask = arg_info(op->args[1])->z_mask;
56
+
56
+
57
if (arg_is_const(op->args[2])) {
57
if (arg_is_const(op->args[2])) {
58
- ctx->z_mask = do_constant_folding(op->opc, ctx->type,
58
- ctx->z_mask = do_constant_folding(op->opc, ctx->type,
59
- arg_info(op->args[1])->z_mask,
59
- arg_info(op->args[1])->z_mask,
60
- arg_info(op->args[2])->val);
60
- arg_info(op->args[2])->val);
61
+ int sh = arg_info(op->args[2])->val;
61
+ int sh = arg_info(op->args[2])->val;
62
+
62
+
63
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
63
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
64
+
64
+
65
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
65
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
66
+ ctx->s_mask = smask_from_smask(s_mask);
66
+ ctx->s_mask = smask_from_smask(s_mask);
67
+
67
+
68
return fold_masks(ctx, op);
68
return fold_masks(ctx, op);
69
}
69
}
70
+
70
+
71
+ switch (op->opc) {
71
+ switch (op->opc) {
72
+ CASE_OP_32_64(sar):
72
+ CASE_OP_32_64(sar):
73
+ /*
73
+ /*
74
+ * Arithmetic right shift will not reduce the number of
74
+ * Arithmetic right shift will not reduce the number of
75
+ * input sign repetitions.
75
+ * input sign repetitions.
76
+ */
76
+ */
77
+ ctx->s_mask = s_mask;
77
+ ctx->s_mask = s_mask;
78
+ break;
78
+ break;
79
+ CASE_OP_32_64(shr):
79
+ CASE_OP_32_64(shr):
80
+ /*
80
+ /*
81
+ * If the sign bit is known zero, then logical right shift
81
+ * If the sign bit is known zero, then logical right shift
82
+ * will not reduced the number of input sign repetitions.
82
+ * will not reduced the number of input sign repetitions.
83
+ */
83
+ */
84
+ sign = (s_mask & -s_mask) >> 1;
84
+ sign = (s_mask & -s_mask) >> 1;
85
+ if (!(z_mask & sign)) {
85
+ if (!(z_mask & sign)) {
86
+ ctx->s_mask = s_mask;
86
+ ctx->s_mask = s_mask;
87
+ }
87
+ }
88
+ break;
88
+ break;
89
+ default:
89
+ default:
90
+ break;
90
+ break;
91
+ }
91
+ }
92
+
92
+
93
return false;
93
return false;
94
}
94
}
95
95
96
--
96
--
97
2.25.1
97
2.25.1
98
98
99
99
diff view generated by jsdifflib
New patch
1
From: Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru>
1
2
3
Watchpoint processing code restores vCPU state twice:
4
in tb_check_watchpoint and in cpu_loop_exit_restore/cpu_restore_state.
5
Normally it does not affect anything, but in icount mode instruction
6
counter is incremented twice and becomes incorrect.
7
This patch eliminates unneeded CPU state restore.
8
9
Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
10
Reviewed-by: David Hildenbrand <david@redhat.com>
11
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
12
Message-Id: <163542168516.2127597.8781375223437124644.stgit@pasha-ThinkPad-X280>
13
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
14
---
15
softmmu/physmem.c | 6 ++----
16
1 file changed, 2 insertions(+), 4 deletions(-)
17
18
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/softmmu/physmem.c
21
+++ b/softmmu/physmem.c
22
@@ -XXX,XX +XXX,XX @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
23
cpu->watchpoint_hit = wp;
24
25
mmap_lock();
26
+ /* This call also restores vCPU state */
27
tb_check_watchpoint(cpu, ra);
28
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
29
cpu->exception_index = EXCP_DEBUG;
30
mmap_unlock();
31
- cpu_loop_exit_restore(cpu, ra);
32
+ cpu_loop_exit(cpu);
33
} else {
34
/* Force execution of one insn next time. */
35
cpu->cflags_next_tb = 1 | curr_cflags(cpu);
36
mmap_unlock();
37
- if (ra) {
38
- cpu_restore_state(cpu, ra, true);
39
- }
40
cpu_loop_exit_noexc(cpu);
41
}
42
}
43
--
44
2.25.1
45
46
diff view generated by jsdifflib
New patch
1
From: Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru>
1
2
3
cpu_check_watchpoint function checks cpu->watchpoint_hit at the entry.
4
But then it also does the same in the middle of the function,
5
while this field can't change.
6
That is why this patch removes this useless condition.
7
8
Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <163542169094.2127597.8801843697434113110.stgit@pasha-ThinkPad-X280>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
13
softmmu/physmem.c | 39 +++++++++++++++++++--------------------
14
1 file changed, 19 insertions(+), 20 deletions(-)
15
16
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/softmmu/physmem.c
19
+++ b/softmmu/physmem.c
20
@@ -XXX,XX +XXX,XX @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
21
}
22
wp->hitaddr = MAX(addr, wp->vaddr);
23
wp->hitattrs = attrs;
24
- if (!cpu->watchpoint_hit) {
25
- if (wp->flags & BP_CPU && cc->tcg_ops->debug_check_watchpoint &&
26
- !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
27
- wp->flags &= ~BP_WATCHPOINT_HIT;
28
- continue;
29
- }
30
- cpu->watchpoint_hit = wp;
31
32
- mmap_lock();
33
- /* This call also restores vCPU state */
34
- tb_check_watchpoint(cpu, ra);
35
- if (wp->flags & BP_STOP_BEFORE_ACCESS) {
36
- cpu->exception_index = EXCP_DEBUG;
37
- mmap_unlock();
38
- cpu_loop_exit(cpu);
39
- } else {
40
- /* Force execution of one insn next time. */
41
- cpu->cflags_next_tb = 1 | curr_cflags(cpu);
42
- mmap_unlock();
43
- cpu_loop_exit_noexc(cpu);
44
- }
45
+ if (wp->flags & BP_CPU && cc->tcg_ops->debug_check_watchpoint &&
46
+ !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
47
+ wp->flags &= ~BP_WATCHPOINT_HIT;
48
+ continue;
49
+ }
50
+ cpu->watchpoint_hit = wp;
51
+
52
+ mmap_lock();
53
+ /* This call also restores vCPU state */
54
+ tb_check_watchpoint(cpu, ra);
55
+ if (wp->flags & BP_STOP_BEFORE_ACCESS) {
56
+ cpu->exception_index = EXCP_DEBUG;
57
+ mmap_unlock();
58
+ cpu_loop_exit(cpu);
59
+ } else {
60
+ /* Force execution of one insn next time. */
61
+ cpu->cflags_next_tb = 1 | curr_cflags(cpu);
62
+ mmap_unlock();
63
+ cpu_loop_exit_noexc(cpu);
64
}
65
} else {
66
wp->flags &= ~BP_WATCHPOINT_HIT;
67
--
68
2.25.1
69
70
diff view generated by jsdifflib
New patch
1
From: Pavel Dovgalyuk <pavel.dovgalyuk@ispras.ru>
1
2
3
Watchpoints that should fire after the memory access
4
break an execution of the current block, try to
5
translate current instruction into the separate block,
6
which then causes debug interrupt.
7
But cpu_interrupt can't be called in such block when
8
icount is enabled, because interrupts muse be allowed
9
explicitly.
10
This patch sets CF_LAST_IO flag for retranslated block,
11
allowing interrupt request for the last instruction.
12
13
Signed-off-by: Pavel Dovgalyuk <Pavel.Dovgalyuk@ispras.ru>
14
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
15
Message-Id: <163542169727.2127597.8141772572696627329.stgit@pasha-ThinkPad-X280>
16
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
17
---
18
softmmu/physmem.c | 2 +-
19
1 file changed, 1 insertion(+), 1 deletion(-)
20
21
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/softmmu/physmem.c
24
+++ b/softmmu/physmem.c
25
@@ -XXX,XX +XXX,XX @@ void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
26
cpu_loop_exit(cpu);
27
} else {
28
/* Force execution of one insn next time. */
29
- cpu->cflags_next_tb = 1 | curr_cflags(cpu);
30
+ cpu->cflags_next_tb = 1 | CF_LAST_IO | curr_cflags(cpu);
31
mmap_unlock();
32
cpu_loop_exit_noexc(cpu);
33
}
34
--
35
2.25.1
36
37
diff view generated by jsdifflib