1
The following changes since commit c52d69e7dbaaed0ffdef8125e79218672c30161d:
1
The following changes since commit d530697ca20e19f7a626f4c1c8b26fccd0dc4470:
2
2
3
Merge remote-tracking branch 'remotes/cschoenebeck/tags/pull-9p-20211027' into staging (2021-10-27 11:45:18 -0700)
3
Merge tag 'pull-testing-updates-100523-1' of https://gitlab.com/stsquad/qemu into staging (2023-05-10 16:43:01 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20211027
7
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20230511
8
8
9
for you to fetch changes up to 820c025f0dcacf2f3c12735b1f162893fbfa7bc6:
9
for you to fetch changes up to b2d4d6616c22325dff802e0a35092167f2dc2268:
10
10
11
tcg/optimize: Propagate sign info for shifting (2021-10-27 17:11:23 -0700)
11
target/loongarch: Do not include tcg-ldst.h (2023-05-11 06:06:04 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Improvements to qemu/int128
14
target/m68k: Fix gen_load_fp regression
15
Fixes for 128/64 division.
15
accel/tcg: Ensure fairness with icount
16
Cleanup tcg/optimize.c
16
disas: Move disas.c into the target-independent source sets
17
Optimize redundant sign extensions
17
tcg: Use common routines for calling slow path helpers
18
tcg/*: Cleanups to qemu_ld/st constraints
19
tcg: Remove TARGET_ALIGNED_ONLY
20
accel/tcg: Reorg system mode load/store helpers
18
21
19
----------------------------------------------------------------
22
----------------------------------------------------------------
20
Frédéric Pétrot (1):
23
Jamie Iles (2):
21
qemu/int128: Add int128_{not,xor}
24
cpu: expose qemu_cpu_list_lock for lock-guard use
25
accel/tcg/tcg-accel-ops-rr: ensure fairness with icount
22
26
23
Luis Pires (4):
27
Richard Henderson (49):
24
host-utils: move checks out of divu128/divs128
28
target/m68k: Fix gen_load_fp for OS_LONG
25
host-utils: move udiv_qrnnd() to host-utils
29
accel/tcg: Fix atomic_mmu_lookup for reads
26
host-utils: add 128-bit quotient support to divu128/divs128
30
disas: Fix tabs and braces in disas.c
27
host-utils: add unit tests for divu128/divs128
31
disas: Move disas.c to disas/
32
disas: Remove target_ulong from the interface
33
disas: Remove target-specific headers
34
tcg/i386: Introduce prepare_host_addr
35
tcg/i386: Use indexed addressing for softmmu fast path
36
tcg/aarch64: Introduce prepare_host_addr
37
tcg/arm: Introduce prepare_host_addr
38
tcg/loongarch64: Introduce prepare_host_addr
39
tcg/mips: Introduce prepare_host_addr
40
tcg/ppc: Introduce prepare_host_addr
41
tcg/riscv: Introduce prepare_host_addr
42
tcg/s390x: Introduce prepare_host_addr
43
tcg: Add routines for calling slow-path helpers
44
tcg/i386: Convert tcg_out_qemu_ld_slow_path
45
tcg/i386: Convert tcg_out_qemu_st_slow_path
46
tcg/aarch64: Convert tcg_out_qemu_{ld,st}_slow_path
47
tcg/arm: Convert tcg_out_qemu_{ld,st}_slow_path
48
tcg/loongarch64: Convert tcg_out_qemu_{ld,st}_slow_path
49
tcg/mips: Convert tcg_out_qemu_{ld,st}_slow_path
50
tcg/ppc: Convert tcg_out_qemu_{ld,st}_slow_path
51
tcg/riscv: Convert tcg_out_qemu_{ld,st}_slow_path
52
tcg/s390x: Convert tcg_out_qemu_{ld,st}_slow_path
53
tcg/loongarch64: Simplify constraints on qemu_ld/st
54
tcg/mips: Remove MO_BSWAP handling
55
tcg/mips: Reorg tlb load within prepare_host_addr
56
tcg/mips: Simplify constraints on qemu_ld/st
57
tcg/ppc: Reorg tcg_out_tlb_read
58
tcg/ppc: Adjust constraints on qemu_ld/st
59
tcg/ppc: Remove unused constraints A, B, C, D
60
tcg/ppc: Remove unused constraint J
61
tcg/riscv: Simplify constraints on qemu_ld/st
62
tcg/s390x: Use ALGFR in constructing softmmu host address
63
tcg/s390x: Simplify constraints on qemu_ld/st
64
target/mips: Add MO_ALIGN to gen_llwp, gen_scwp
65
target/mips: Add missing default_tcg_memop_mask
66
target/mips: Use MO_ALIGN instead of 0
67
target/mips: Remove TARGET_ALIGNED_ONLY
68
target/nios2: Remove TARGET_ALIGNED_ONLY
69
target/sh4: Use MO_ALIGN where required
70
target/sh4: Remove TARGET_ALIGNED_ONLY
71
tcg: Remove TARGET_ALIGNED_ONLY
72
accel/tcg: Add cpu_in_serial_context
73
accel/tcg: Introduce tlb_read_idx
74
accel/tcg: Reorg system mode load helpers
75
accel/tcg: Reorg system mode store helpers
76
target/loongarch: Do not include tcg-ldst.h
28
77
29
Richard Henderson (51):
78
Thomas Huth (2):
30
tcg/optimize: Rename "mask" to "z_mask"
79
disas: Move softmmu specific code to separate file
31
tcg/optimize: Split out OptContext
80
disas: Move disas.c into the target-independent source set
32
tcg/optimize: Remove do_default label
33
tcg/optimize: Change tcg_opt_gen_{mov,movi} interface
34
tcg/optimize: Move prev_mb into OptContext
35
tcg/optimize: Split out init_arguments
36
tcg/optimize: Split out copy_propagate
37
tcg/optimize: Split out fold_call
38
tcg/optimize: Drop nb_oargs, nb_iargs locals
39
tcg/optimize: Change fail return for do_constant_folding_cond*
40
tcg/optimize: Return true from tcg_opt_gen_{mov,movi}
41
tcg/optimize: Split out finish_folding
42
tcg/optimize: Use a boolean to avoid a mass of continues
43
tcg/optimize: Split out fold_mb, fold_qemu_{ld,st}
44
tcg/optimize: Split out fold_const{1,2}
45
tcg/optimize: Split out fold_setcond2
46
tcg/optimize: Split out fold_brcond2
47
tcg/optimize: Split out fold_brcond
48
tcg/optimize: Split out fold_setcond
49
tcg/optimize: Split out fold_mulu2_i32
50
tcg/optimize: Split out fold_addsub2_i32
51
tcg/optimize: Split out fold_movcond
52
tcg/optimize: Split out fold_extract2
53
tcg/optimize: Split out fold_extract, fold_sextract
54
tcg/optimize: Split out fold_deposit
55
tcg/optimize: Split out fold_count_zeros
56
tcg/optimize: Split out fold_bswap
57
tcg/optimize: Split out fold_dup, fold_dup2
58
tcg/optimize: Split out fold_mov
59
tcg/optimize: Split out fold_xx_to_i
60
tcg/optimize: Split out fold_xx_to_x
61
tcg/optimize: Split out fold_xi_to_i
62
tcg/optimize: Add type to OptContext
63
tcg/optimize: Split out fold_to_not
64
tcg/optimize: Split out fold_sub_to_neg
65
tcg/optimize: Split out fold_xi_to_x
66
tcg/optimize: Split out fold_ix_to_i
67
tcg/optimize: Split out fold_masks
68
tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies
69
tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops
70
tcg/optimize: Sink commutative operand swapping into fold functions
71
tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values
72
tcg/optimize: Use fold_xx_to_i for orc
73
tcg/optimize: Use fold_xi_to_x for mul
74
tcg/optimize: Use fold_xi_to_x for div
75
tcg/optimize: Use fold_xx_to_i for rem
76
tcg/optimize: Optimize sign extensions
77
tcg/optimize: Propagate sign info for logical operations
78
tcg/optimize: Propagate sign info for setcond
79
tcg/optimize: Propagate sign info for bit counting
80
tcg/optimize: Propagate sign info for shifting
81
81
82
include/fpu/softfloat-macros.h | 82 --
82
configs/targets/mips-linux-user.mak | 1 -
83
include/hw/clock.h | 5 +-
83
configs/targets/mips-softmmu.mak | 1 -
84
include/qemu/host-utils.h | 121 +-
84
configs/targets/mips64-linux-user.mak | 1 -
85
include/qemu/int128.h | 20 +
85
configs/targets/mips64-softmmu.mak | 1 -
86
target/ppc/int_helper.c | 23 +-
86
configs/targets/mips64el-linux-user.mak | 1 -
87
tcg/optimize.c | 2644 ++++++++++++++++++++++++----------------
87
configs/targets/mips64el-softmmu.mak | 1 -
88
tests/unit/test-div128.c | 197 +++
88
configs/targets/mipsel-linux-user.mak | 1 -
89
util/host-utils.c | 147 ++-
89
configs/targets/mipsel-softmmu.mak | 1 -
90
tests/unit/meson.build | 1 +
90
configs/targets/mipsn32-linux-user.mak | 1 -
91
9 files changed, 2053 insertions(+), 1187 deletions(-)
91
configs/targets/mipsn32el-linux-user.mak | 1 -
92
create mode 100644 tests/unit/test-div128.c
92
configs/targets/nios2-softmmu.mak | 1 -
93
93
configs/targets/sh4-linux-user.mak | 1 -
94
configs/targets/sh4-softmmu.mak | 1 -
95
configs/targets/sh4eb-linux-user.mak | 1 -
96
configs/targets/sh4eb-softmmu.mak | 1 -
97
meson.build | 3 -
98
accel/tcg/internal.h | 9 +
99
accel/tcg/tcg-accel-ops-icount.h | 3 +-
100
disas/disas-internal.h | 21 +
101
include/disas/disas.h | 23 +-
102
include/exec/cpu-common.h | 1 +
103
include/exec/cpu-defs.h | 7 +-
104
include/exec/cpu_ldst.h | 26 +-
105
include/exec/memop.h | 13 +-
106
include/exec/poison.h | 1 -
107
tcg/loongarch64/tcg-target-con-set.h | 2 -
108
tcg/loongarch64/tcg-target-con-str.h | 1 -
109
tcg/mips/tcg-target-con-set.h | 13 +-
110
tcg/mips/tcg-target-con-str.h | 2 -
111
tcg/mips/tcg-target.h | 4 +-
112
tcg/ppc/tcg-target-con-set.h | 11 +-
113
tcg/ppc/tcg-target-con-str.h | 7 -
114
tcg/riscv/tcg-target-con-set.h | 2 -
115
tcg/riscv/tcg-target-con-str.h | 1 -
116
tcg/s390x/tcg-target-con-set.h | 2 -
117
tcg/s390x/tcg-target-con-str.h | 1 -
118
accel/tcg/cpu-exec-common.c | 3 +
119
accel/tcg/cputlb.c | 1113 ++++++++++++++++-------------
120
accel/tcg/tb-maint.c | 2 +-
121
accel/tcg/tcg-accel-ops-icount.c | 21 +-
122
accel/tcg/tcg-accel-ops-rr.c | 37 +-
123
bsd-user/elfload.c | 5 +-
124
cpus-common.c | 2 +-
125
disas/disas-mon.c | 65 ++
126
disas.c => disas/disas.c | 109 +--
127
linux-user/elfload.c | 18 +-
128
migration/dirtyrate.c | 26 +-
129
replay/replay.c | 3 +-
130
target/loongarch/csr_helper.c | 1 -
131
target/loongarch/iocsr_helper.c | 1 -
132
target/m68k/translate.c | 1 +
133
target/mips/tcg/mxu_translate.c | 3 +-
134
target/nios2/translate.c | 10 +
135
target/sh4/translate.c | 102 ++-
136
tcg/tcg.c | 480 ++++++++++++-
137
trace/control-target.c | 9 +-
138
target/mips/tcg/micromips_translate.c.inc | 24 +-
139
target/mips/tcg/mips16e_translate.c.inc | 18 +-
140
target/mips/tcg/nanomips_translate.c.inc | 32 +-
141
tcg/aarch64/tcg-target.c.inc | 347 ++++-----
142
tcg/arm/tcg-target.c.inc | 455 +++++-------
143
tcg/i386/tcg-target.c.inc | 453 +++++-------
144
tcg/loongarch64/tcg-target.c.inc | 313 +++-----
145
tcg/mips/tcg-target.c.inc | 870 +++++++---------------
146
tcg/ppc/tcg-target.c.inc | 512 ++++++-------
147
tcg/riscv/tcg-target.c.inc | 304 ++++----
148
tcg/s390x/tcg-target.c.inc | 314 ++++----
149
disas/meson.build | 6 +-
150
68 files changed, 2788 insertions(+), 3039 deletions(-)
151
create mode 100644 disas/disas-internal.h
152
create mode 100644 disas/disas-mon.c
153
rename disas.c => disas/disas.c (79%)
diff view generated by jsdifflib
1
Recognize the constant function for remainder.
1
Case was accidentally dropped in b7a94da9550b.
2
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
3
Tested-by: Laurent Vivier <laurent@vivier.eu>
4
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Reviewed-by: Laurent Vivier <laurent@vivier.eu>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/optimize.c | 6 +++++-
8
target/m68k/translate.c | 1 +
8
1 file changed, 5 insertions(+), 1 deletion(-)
9
1 file changed, 1 insertion(+)
9
10
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
--- a/target/m68k/translate.c
13
+++ b/tcg/optimize.c
14
+++ b/target/m68k/translate.c
14
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static void gen_load_fp(DisasContext *s, int opsize, TCGv addr, TCGv_ptr fp,
15
16
switch (opsize) {
16
static bool fold_remainder(OptContext *ctx, TCGOp *op)
17
case OS_BYTE:
17
{
18
case OS_WORD:
18
- return fold_const2(ctx, op);
19
+ case OS_LONG:
19
+ if (fold_const2(ctx, op) ||
20
tcg_gen_qemu_ld_tl(tmp, addr, index, opsize | MO_SIGN | MO_TE);
20
+ fold_xx_to_i(ctx, op, 0)) {
21
gen_helper_exts32(cpu_env, fp, tmp);
21
+ return true;
22
break;
22
+ }
23
+ return false;
24
}
25
26
static bool fold_setcond(OptContext *ctx, TCGOp *op)
27
--
23
--
28
2.25.1
24
2.34.1
29
25
30
26
diff view generated by jsdifflib
1
Recognize the identity function for division.
1
A copy-paste bug had us looking at the victim cache for writes.
2
2
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
3
Cc: qemu-stable@nongnu.org
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reported-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Fixes: 08dff435e2 ("tcg: Probe the proper permissions for atomic ops")
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Message-Id: <20230505204049.352469-1-richard.henderson@linaro.org>
7
---
10
---
8
tcg/optimize.c | 6 +++++-
11
accel/tcg/cputlb.c | 2 +-
9
1 file changed, 5 insertions(+), 1 deletion(-)
12
1 file changed, 1 insertion(+), 1 deletion(-)
10
13
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
16
--- a/accel/tcg/cputlb.c
14
+++ b/tcg/optimize.c
17
+++ b/accel/tcg/cputlb.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
18
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
16
19
} else /* if (prot & PAGE_READ) */ {
17
static bool fold_divide(OptContext *ctx, TCGOp *op)
20
tlb_addr = tlbe->addr_read;
18
{
21
if (!tlb_hit(tlb_addr, addr)) {
19
- return fold_const2(ctx, op);
22
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
20
+ if (fold_const2(ctx, op) ||
23
+ if (!VICTIM_TLB_HIT(addr_read, addr)) {
21
+ fold_xi_to_x(ctx, op, 1)) {
24
tlb_fill(env_cpu(env), addr, size,
22
+ return true;
25
MMU_DATA_LOAD, mmu_idx, retaddr);
23
+ }
26
index = tlb_index(env, mmu_idx, addr);
24
+ return false;
25
}
26
27
static bool fold_dup(OptContext *ctx, TCGOp *op)
28
--
27
--
29
2.25.1
28
2.34.1
30
29
31
30
diff view generated by jsdifflib
1
Reduce some code duplication by folding the NE and EQ cases.
1
Fix these before moving the file, for checkpatch.pl.
2
2
3
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230510170812.663149-1-richard.henderson@linaro.org>
5
---
6
---
6
tcg/optimize.c | 159 +++++++++++++++++++++++++------------------------
7
disas.c | 11 ++++++-----
7
1 file changed, 81 insertions(+), 78 deletions(-)
8
1 file changed, 6 insertions(+), 5 deletions(-)
8
9
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/disas.c b/disas.c
10
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
--- a/disas.c
12
+++ b/tcg/optimize.c
13
+++ b/disas.c
13
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
14
return fold_const2(ctx, op);
15
}
15
}
16
16
17
for (pc = code; size > 0; pc += count, size -= count) {
17
+static bool fold_brcond2(OptContext *ctx, TCGOp *op)
18
-    fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
18
+{
19
-    count = s.info.print_insn(pc, &s.info);
19
+ TCGCond cond = op->args[4];
20
-    fprintf(out, "\n");
20
+ int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
21
-    if (count < 0)
21
+ TCGArg label = op->args[5];
22
-     break;
22
+ int inv = 0;
23
+ fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
23
+
24
+ count = s.info.print_insn(pc, &s.info);
24
+ if (i >= 0) {
25
+ fprintf(out, "\n");
25
+ goto do_brcond_const;
26
+ if (count < 0) {
26
+ }
27
+
28
+ switch (cond) {
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
34
+ */
35
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
36
+ arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
37
+ goto do_brcond_high;
38
+ }
39
+ break;
40
+
41
+ case TCG_COND_NE:
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
50
+ op->args[2], cond);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_brcond_const;
54
+ case 1:
55
+ goto do_brcond_high;
56
+ }
57
+
58
+ i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
59
+ op->args[3], cond);
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_brcond_const;
63
+ case 1:
64
+ op->opc = INDEX_op_brcond_i32;
65
+ op->args[1] = op->args[2];
66
+ op->args[2] = cond;
67
+ op->args[3] = label;
68
+ break;
27
+ break;
69
+ }
28
+ }
70
+ break;
29
if (size < count) {
71
+
30
fprintf(out,
72
+ default:
31
"Disassembler disagrees with translator over instruction "
73
+ break;
74
+
75
+ do_brcond_high:
76
+ op->opc = INDEX_op_brcond_i32;
77
+ op->args[0] = op->args[1];
78
+ op->args[1] = op->args[3];
79
+ op->args[2] = cond;
80
+ op->args[3] = label;
81
+ break;
82
+
83
+ do_brcond_const:
84
+ if (i == 0) {
85
+ tcg_op_remove(ctx->tcg, op);
86
+ return true;
87
+ }
88
+ op->opc = INDEX_op_br;
89
+ op->args[0] = label;
90
+ break;
91
+ }
92
+ return false;
93
+}
94
+
95
static bool fold_call(OptContext *ctx, TCGOp *op)
96
{
97
TCGContext *s = ctx->tcg;
98
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
99
}
100
break;
101
102
- case INDEX_op_brcond2_i32:
103
- i = do_constant_folding_cond2(&op->args[0], &op->args[2],
104
- op->args[4]);
105
- if (i == 0) {
106
- do_brcond_false:
107
- tcg_op_remove(s, op);
108
- continue;
109
- }
110
- if (i > 0) {
111
- do_brcond_true:
112
- op->opc = opc = INDEX_op_br;
113
- op->args[0] = op->args[5];
114
- break;
115
- }
116
- if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
117
- && arg_is_const(op->args[2])
118
- && arg_info(op->args[2])->val == 0
119
- && arg_is_const(op->args[3])
120
- && arg_info(op->args[3])->val == 0) {
121
- /* Simplify LT/GE comparisons vs zero to a single compare
122
- vs the high word of the input. */
123
- do_brcond_high:
124
- op->opc = opc = INDEX_op_brcond_i32;
125
- op->args[0] = op->args[1];
126
- op->args[1] = op->args[3];
127
- op->args[2] = op->args[4];
128
- op->args[3] = op->args[5];
129
- break;
130
- }
131
- if (op->args[4] == TCG_COND_EQ) {
132
- /* Simplify EQ comparisons where one of the pairs
133
- can be simplified. */
134
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
135
- op->args[0], op->args[2],
136
- TCG_COND_EQ);
137
- if (i == 0) {
138
- goto do_brcond_false;
139
- } else if (i > 0) {
140
- goto do_brcond_high;
141
- }
142
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
143
- op->args[1], op->args[3],
144
- TCG_COND_EQ);
145
- if (i == 0) {
146
- goto do_brcond_false;
147
- } else if (i < 0) {
148
- break;
149
- }
150
- do_brcond_low:
151
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
152
- op->opc = INDEX_op_brcond_i32;
153
- op->args[1] = op->args[2];
154
- op->args[2] = op->args[4];
155
- op->args[3] = op->args[5];
156
- break;
157
- }
158
- if (op->args[4] == TCG_COND_NE) {
159
- /* Simplify NE comparisons where one of the pairs
160
- can be simplified. */
161
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
162
- op->args[0], op->args[2],
163
- TCG_COND_NE);
164
- if (i == 0) {
165
- goto do_brcond_high;
166
- } else if (i > 0) {
167
- goto do_brcond_true;
168
- }
169
- i = do_constant_folding_cond(INDEX_op_brcond_i32,
170
- op->args[1], op->args[3],
171
- TCG_COND_NE);
172
- if (i == 0) {
173
- goto do_brcond_low;
174
- } else if (i > 0) {
175
- goto do_brcond_true;
176
- }
177
- }
178
- break;
179
-
180
default:
181
break;
182
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
CASE_OP_32_64_VEC(andc):
185
done = fold_andc(&ctx, op);
186
break;
187
+ case INDEX_op_brcond2_i32:
188
+ done = fold_brcond2(&ctx, op);
189
+ break;
190
CASE_OP_32_64(ctpop):
191
done = fold_ctpop(&ctx, op);
192
break;
193
--
32
--
194
2.25.1
33
2.34.1
195
196
diff view generated by jsdifflib
1
Recognize the identity function for low-part multiply.
1
Reviewed-by: Thomas Huth <thuth@redhat.com>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Message-Id: <20230503072331.1747057-80-richard.henderson@linaro.org>
4
---
5
meson.build | 3 ---
6
disas.c => disas/disas.c | 0
7
disas/meson.build | 4 +++-
8
3 files changed, 3 insertions(+), 4 deletions(-)
9
rename disas.c => disas/disas.c (100%)
2
10
3
Suggested-by: Luis Pires <luis.pires@eldorado.org.br>
11
diff --git a/meson.build b/meson.build
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 3 ++-
9
1 file changed, 2 insertions(+), 1 deletion(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/meson.build
14
+++ b/tcg/optimize.c
14
+++ b/meson.build
15
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ specific_ss.add(files('cpu.c'))
16
static bool fold_mul(OptContext *ctx, TCGOp *op)
16
17
{
17
subdir('softmmu')
18
if (fold_const2(ctx, op) ||
18
19
- fold_xi_to_i(ctx, op, 0)) {
19
-common_ss.add(capstone)
20
+ fold_xi_to_i(ctx, op, 0) ||
20
-specific_ss.add(files('disas.c'), capstone)
21
+ fold_xi_to_x(ctx, op, 1)) {
21
-
22
return true;
22
# Work around a gcc bug/misfeature wherein constant propagation looks
23
}
23
# through an alias:
24
return false;
24
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99696
25
diff --git a/disas.c b/disas/disas.c
26
similarity index 100%
27
rename from disas.c
28
rename to disas/disas.c
29
diff --git a/disas/meson.build b/disas/meson.build
30
index XXXXXXX..XXXXXXX 100644
31
--- a/disas/meson.build
32
+++ b/disas/meson.build
33
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files('riscv.c'))
34
common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
35
common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
36
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
37
-common_ss.add(when: capstone, if_true: files('capstone.c'))
38
+common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
39
+
40
+specific_ss.add(files('disas.c'), capstone)
25
--
41
--
26
2.25.1
42
2.34.1
27
28
diff view generated by jsdifflib
1
Provide what will become a larger context for splitting
1
Use uint64_t for the pc, and size_t for the size.
2
the very large tcg_optimize function.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Reviewed-by: Thomas Huth <thuth@redhat.com>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Message-Id: <20230503072331.1747057-81-richard.henderson@linaro.org>
8
---
6
---
9
tcg/optimize.c | 77 ++++++++++++++++++++++++++------------------------
7
include/disas/disas.h | 17 ++++++-----------
10
1 file changed, 40 insertions(+), 37 deletions(-)
8
bsd-user/elfload.c | 5 +++--
9
disas/disas.c | 19 +++++++++----------
10
linux-user/elfload.c | 5 +++--
11
4 files changed, 21 insertions(+), 25 deletions(-)
11
12
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/include/disas/disas.h b/include/disas/disas.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
--- a/include/disas/disas.h
15
+++ b/tcg/optimize.c
16
+++ b/include/disas/disas.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
17
@@ -XXX,XX +XXX,XX @@
17
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
18
#include "cpu.h"
18
} TempOptInfo;
19
19
20
/* Disassemble this for me please... (debugging). */
20
+typedef struct OptContext {
21
-void disas(FILE *out, const void *code, unsigned long size);
21
+ TCGTempSet temps_used;
22
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
22
+} OptContext;
23
- target_ulong size);
24
+void disas(FILE *out, const void *code, size_t size);
25
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
26
27
-void monitor_disas(Monitor *mon, CPUState *cpu,
28
- target_ulong pc, int nb_insn, int is_physical);
29
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
30
+ int nb_insn, bool is_physical);
31
32
char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
33
34
/* Look up symbol for debugging purpose. Returns "" if unknown. */
35
-const char *lookup_symbol(target_ulong orig_addr);
36
+const char *lookup_symbol(uint64_t orig_addr);
37
#endif
38
39
struct syminfo;
40
struct elf32_sym;
41
struct elf64_sym;
42
43
-#if defined(CONFIG_USER_ONLY)
44
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, target_ulong orig_addr);
45
-#else
46
-typedef const char *(*lookup_symbol_t)(struct syminfo *s, hwaddr orig_addr);
47
-#endif
48
+typedef const char *(*lookup_symbol_t)(struct syminfo *s, uint64_t orig_addr);
49
50
struct syminfo {
51
lookup_symbol_t lookup_symbol;
52
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/bsd-user/elfload.c
55
+++ b/bsd-user/elfload.c
56
@@ -XXX,XX +XXX,XX @@ static abi_ulong load_elf_interp(struct elfhdr *interp_elf_ex,
57
58
static int symfind(const void *s0, const void *s1)
59
{
60
- target_ulong addr = *(target_ulong *)s0;
61
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
62
struct elf_sym *sym = (struct elf_sym *)s1;
63
int result = 0;
23
+
64
+
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
65
if (addr < sym->st_value) {
66
result = -1;
67
} else if (addr >= sym->st_value + sym->st_size) {
68
@@ -XXX,XX +XXX,XX @@ static int symfind(const void *s0, const void *s1)
69
return result;
70
}
71
72
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
73
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
25
{
74
{
26
return ts->state_ptr;
75
#if ELF_CLASS == ELFCLASS32
27
@@ -XXX,XX +XXX,XX @@ static void reset_temp(TCGArg arg)
76
struct elf_sym *syms = s->disas_symtab.elf32;
77
diff --git a/disas/disas.c b/disas/disas.c
78
index XXXXXXX..XXXXXXX 100644
79
--- a/disas/disas.c
80
+++ b/disas/disas.c
81
@@ -XXX,XX +XXX,XX @@ static void initialize_debug_host(CPUDebug *s)
28
}
82
}
29
83
30
/* Initialize and activate a temporary. */
84
/* Disassemble this for me please... (debugging). */
31
-static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
85
-void target_disas(FILE *out, CPUState *cpu, target_ulong code,
32
+static void init_ts_info(OptContext *ctx, TCGTemp *ts)
86
- target_ulong size)
87
+void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
33
{
88
{
34
size_t idx = temp_idx(ts);
89
- target_ulong pc;
35
TempOptInfo *ti;
90
+ uint64_t pc;
36
91
int count;
37
- if (test_bit(idx, temps_used->l)) {
92
CPUDebug s;
38
+ if (test_bit(idx, ctx->temps_used.l)) {
93
94
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, target_ulong code,
95
}
96
97
for (pc = code; size > 0; pc += count, size -= count) {
98
- fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
99
+ fprintf(out, "0x%08" PRIx64 ": ", pc);
100
count = s.info.print_insn(pc, &s.info);
101
fprintf(out, "\n");
102
if (count < 0) {
103
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
104
}
105
106
/* Disassemble this for me please... (debugging). */
107
-void disas(FILE *out, const void *code, unsigned long size)
108
+void disas(FILE *out, const void *code, size_t size)
109
{
110
uintptr_t pc;
111
int count;
112
@@ -XXX,XX +XXX,XX @@ void disas(FILE *out, const void *code, unsigned long size)
113
}
114
115
/* Look up symbol for debugging purpose. Returns "" if unknown. */
116
-const char *lookup_symbol(target_ulong orig_addr)
117
+const char *lookup_symbol(uint64_t orig_addr)
118
{
119
const char *symbol = "";
120
struct syminfo *s;
121
@@ -XXX,XX +XXX,XX @@ physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
122
}
123
124
/* Disassembler for the monitor. */
125
-void monitor_disas(Monitor *mon, CPUState *cpu,
126
- target_ulong pc, int nb_insn, int is_physical)
127
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
128
+ int nb_insn, bool is_physical)
129
{
130
int count, i;
131
CPUDebug s;
132
@@ -XXX,XX +XXX,XX @@ void monitor_disas(Monitor *mon, CPUState *cpu,
133
}
134
135
if (!s.info.print_insn) {
136
- monitor_printf(mon, "0x" TARGET_FMT_lx
137
+ monitor_printf(mon, "0x%08" PRIx64
138
": Asm output not supported on this arch\n", pc);
39
return;
139
return;
40
}
140
}
41
- set_bit(idx, temps_used->l);
141
42
+ set_bit(idx, ctx->temps_used.l);
142
for (i = 0; i < nb_insn; i++) {
43
143
- g_string_append_printf(ds, "0x" TARGET_FMT_lx ": ", pc);
44
ti = ts->state_ptr;
144
+ g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
45
if (ti == NULL) {
145
count = s.info.print_insn(pc, &s.info);
46
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
146
g_string_append_c(ds, '\n');
47
}
147
if (count < 0) {
148
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
149
index XXXXXXX..XXXXXXX 100644
150
--- a/linux-user/elfload.c
151
+++ b/linux-user/elfload.c
152
@@ -XXX,XX +XXX,XX @@ static void load_elf_interp(const char *filename, struct image_info *info,
153
154
static int symfind(const void *s0, const void *s1)
155
{
156
- target_ulong addr = *(target_ulong *)s0;
157
struct elf_sym *sym = (struct elf_sym *)s1;
158
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
159
int result = 0;
160
+
161
if (addr < sym->st_value) {
162
result = -1;
163
} else if (addr >= sym->st_value + sym->st_size) {
164
@@ -XXX,XX +XXX,XX @@ static int symfind(const void *s0, const void *s1)
165
return result;
48
}
166
}
49
167
50
-static void init_arg_info(TCGTempSet *temps_used, TCGArg arg)
168
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
51
+static void init_arg_info(OptContext *ctx, TCGArg arg)
169
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
52
{
170
{
53
- init_ts_info(temps_used, arg_temp(arg));
171
#if ELF_CLASS == ELFCLASS32
54
+ init_ts_info(ctx, arg_temp(arg));
172
struct elf_sym *syms = s->disas_symtab.elf32;
55
}
56
57
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
58
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
59
}
60
}
61
62
-static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
63
+static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
64
TCGOp *op, TCGArg dst, uint64_t val)
65
{
66
const TCGOpDef *def = &tcg_op_defs[op->opc];
67
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, TCGTempSet *temps_used,
68
69
/* Convert movi to mov with constant temp. */
70
tv = tcg_constant_internal(type, val);
71
- init_ts_info(temps_used, tv);
72
+ init_ts_info(ctx, tv);
73
tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
74
}
75
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
77
{
78
int nb_temps, nb_globals, i;
79
TCGOp *op, *op_next, *prev_mb = NULL;
80
- TCGTempSet temps_used;
81
+ OptContext ctx = {};
82
83
/* Array VALS has an element for each temp.
84
If this temp holds a constant then its value is kept in VALS' element.
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
nb_temps = s->nb_temps;
87
nb_globals = s->nb_globals;
88
89
- memset(&temps_used, 0, sizeof(temps_used));
90
for (i = 0; i < nb_temps; ++i) {
91
s->temps[i].state_ptr = NULL;
92
}
93
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
94
for (i = 0; i < nb_oargs + nb_iargs; i++) {
95
TCGTemp *ts = arg_temp(op->args[i]);
96
if (ts) {
97
- init_ts_info(&temps_used, ts);
98
+ init_ts_info(&ctx, ts);
99
}
100
}
101
} else {
102
nb_oargs = def->nb_oargs;
103
nb_iargs = def->nb_iargs;
104
for (i = 0; i < nb_oargs + nb_iargs; i++) {
105
- init_arg_info(&temps_used, op->args[i]);
106
+ init_arg_info(&ctx, op->args[i]);
107
}
108
}
109
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64(rotr):
112
if (arg_is_const(op->args[1])
113
&& arg_info(op->args[1])->val == 0) {
114
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
115
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
116
continue;
117
}
118
break;
119
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
120
121
if (partmask == 0) {
122
tcg_debug_assert(nb_oargs == 1);
123
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
124
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
125
continue;
126
}
127
if (affected == 0) {
128
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
129
CASE_OP_32_64(mulsh):
130
if (arg_is_const(op->args[2])
131
&& arg_info(op->args[2])->val == 0) {
132
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
133
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
134
continue;
135
}
136
break;
137
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
138
CASE_OP_32_64_VEC(sub):
139
CASE_OP_32_64_VEC(xor):
140
if (args_are_copies(op->args[1], op->args[2])) {
141
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], 0);
142
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
143
continue;
144
}
145
break;
146
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
147
if (arg_is_const(op->args[1])) {
148
tmp = arg_info(op->args[1])->val;
149
tmp = dup_const(TCGOP_VECE(op), tmp);
150
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
151
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
break;
153
}
154
goto do_default;
155
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
156
case INDEX_op_dup2_vec:
157
assert(TCG_TARGET_REG_BITS == 32);
158
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
159
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0],
160
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
deposit64(arg_info(op->args[1])->val, 32, 32,
162
arg_info(op->args[2])->val));
163
break;
164
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
165
case INDEX_op_extrh_i64_i32:
166
if (arg_is_const(op->args[1])) {
167
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
168
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
169
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
break;
171
}
172
goto do_default;
173
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
174
if (arg_is_const(op->args[1])) {
175
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
176
op->args[2]);
177
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
178
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
break;
180
}
181
goto do_default;
182
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
183
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
184
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
185
arg_info(op->args[2])->val);
186
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
187
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
break;
189
}
190
goto do_default;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
TCGArg v = arg_info(op->args[1])->val;
193
if (v != 0) {
194
tmp = do_constant_folding(opc, v, 0);
195
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
196
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
} else {
198
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
199
}
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
tmp = deposit64(arg_info(op->args[1])->val,
202
op->args[3], op->args[4],
203
arg_info(op->args[2])->val);
204
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
205
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
206
break;
207
}
208
goto do_default;
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
if (arg_is_const(op->args[1])) {
211
tmp = extract64(arg_info(op->args[1])->val,
212
op->args[2], op->args[3]);
213
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
214
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
215
break;
216
}
217
goto do_default;
218
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
219
if (arg_is_const(op->args[1])) {
220
tmp = sextract64(arg_info(op->args[1])->val,
221
op->args[2], op->args[3]);
222
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
223
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
224
break;
225
}
226
goto do_default;
227
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
228
tmp = (int32_t)(((uint32_t)v1 >> shr) |
229
((uint32_t)v2 << (32 - shr)));
230
}
231
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
232
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
233
break;
234
}
235
goto do_default;
236
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
237
tmp = do_constant_folding_cond(opc, op->args[1],
238
op->args[2], op->args[3]);
239
if (tmp != 2) {
240
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
241
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
242
break;
243
}
244
goto do_default;
245
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
246
op->args[1], op->args[2]);
247
if (tmp != 2) {
248
if (tmp) {
249
- memset(&temps_used, 0, sizeof(temps_used));
250
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
251
op->opc = INDEX_op_br;
252
op->args[0] = op->args[3];
253
} else {
254
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
255
256
rl = op->args[0];
257
rh = op->args[1];
258
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)a);
259
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(a >> 32));
260
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
261
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
262
break;
263
}
264
goto do_default;
265
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
266
267
rl = op->args[0];
268
rh = op->args[1];
269
- tcg_opt_gen_movi(s, &temps_used, op, rl, (int32_t)r);
270
- tcg_opt_gen_movi(s, &temps_used, op2, rh, (int32_t)(r >> 32));
271
+ tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
272
+ tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
273
break;
274
}
275
goto do_default;
276
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
277
if (tmp != 2) {
278
if (tmp) {
279
do_brcond_true:
280
- memset(&temps_used, 0, sizeof(temps_used));
281
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
282
op->opc = INDEX_op_br;
283
op->args[0] = op->args[5];
284
} else {
285
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
286
/* Simplify LT/GE comparisons vs zero to a single compare
287
vs the high word of the input. */
288
do_brcond_high:
289
- memset(&temps_used, 0, sizeof(temps_used));
290
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
op->opc = INDEX_op_brcond_i32;
292
op->args[0] = op->args[1];
293
op->args[1] = op->args[3];
294
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
295
goto do_default;
296
}
297
do_brcond_low:
298
- memset(&temps_used, 0, sizeof(temps_used));
299
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
300
op->opc = INDEX_op_brcond_i32;
301
op->args[1] = op->args[2];
302
op->args[2] = op->args[4];
303
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
304
op->args[5]);
305
if (tmp != 2) {
306
do_setcond_const:
307
- tcg_opt_gen_movi(s, &temps_used, op, op->args[0], tmp);
308
+ tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
309
} else if ((op->args[5] == TCG_COND_LT
310
|| op->args[5] == TCG_COND_GE)
311
&& arg_is_const(op->args[3])
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (!(tcg_call_flags(op)
314
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
315
for (i = 0; i < nb_globals; i++) {
316
- if (test_bit(i, temps_used.l)) {
317
+ if (test_bit(i, ctx.temps_used.l)) {
318
reset_ts(&s->temps[i]);
319
}
320
}
321
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
322
block, otherwise we only trash the output args. "z_mask" is
323
the non-zero bits mask for the first output arg. */
324
if (def->flags & TCG_OPF_BB_END) {
325
- memset(&temps_used, 0, sizeof(temps_used));
326
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
327
} else {
328
do_reset_output:
329
for (i = 0; i < nb_oargs; i++) {
330
--
173
--
331
2.25.1
174
2.34.1
332
333
diff view generated by jsdifflib
1
Most of these are handled by creating a fold_const2_commutative
1
Reviewed-by: Thomas Huth <thuth@redhat.com>
2
to handle all of the binary operators. The rest were already
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
handled on a case-by-case basis in the switch, and have their
3
Message-Id: <20230503072331.1747057-83-richard.henderson@linaro.org>
4
own fold function in which to place the call.
4
---
5
include/disas/disas.h | 6 ------
6
disas/disas.c | 3 ++-
7
2 files changed, 2 insertions(+), 7 deletions(-)
5
8
6
We now have only one major switch on TCGOpcode.
9
diff --git a/include/disas/disas.h b/include/disas/disas.h
7
8
Introduce NO_DEST and a block comment for swap_commutative in
9
order to make the handling of brcond and movcond opcodes cleaner.
10
11
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
14
tcg/optimize.c | 142 ++++++++++++++++++++++++-------------------------
15
1 file changed, 70 insertions(+), 72 deletions(-)
16
17
diff --git a/tcg/optimize.c b/tcg/optimize.c
18
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
19
--- a/tcg/optimize.c
11
--- a/include/disas/disas.h
20
+++ b/tcg/optimize.c
12
+++ b/include/disas/disas.h
21
@@ -XXX,XX +XXX,XX @@ static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
13
@@ -XXX,XX +XXX,XX @@
22
return -1;
14
#ifndef QEMU_DISAS_H
23
}
15
#define QEMU_DISAS_H
24
16
25
+/**
17
-#include "exec/hwaddr.h"
26
+ * swap_commutative:
27
+ * @dest: TCGArg of the destination argument, or NO_DEST.
28
+ * @p1: first paired argument
29
+ * @p2: second paired argument
30
+ *
31
+ * If *@p1 is a constant and *@p2 is not, swap.
32
+ * If *@p2 matches @dest, swap.
33
+ * Return true if a swap was performed.
34
+ */
35
+
36
+#define NO_DEST temp_arg(NULL)
37
+
38
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
39
{
40
TCGArg a1 = *p1, a2 = *p2;
41
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
42
return false;
43
}
44
45
+static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
46
+{
47
+ swap_commutative(op->args[0], &op->args[1], &op->args[2]);
48
+ return fold_const2(ctx, op);
49
+}
50
+
51
static bool fold_masks(OptContext *ctx, TCGOp *op)
52
{
53
uint64_t a_mask = ctx->a_mask;
54
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
55
56
static bool fold_add(OptContext *ctx, TCGOp *op)
57
{
58
- if (fold_const2(ctx, op) ||
59
+ if (fold_const2_commutative(ctx, op) ||
60
fold_xi_to_x(ctx, op, 0)) {
61
return true;
62
}
63
@@ -XXX,XX +XXX,XX @@ static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
64
65
static bool fold_add2(OptContext *ctx, TCGOp *op)
66
{
67
+ /* Note that the high and low parts may be independently swapped. */
68
+ swap_commutative(op->args[0], &op->args[2], &op->args[4]);
69
+ swap_commutative(op->args[1], &op->args[3], &op->args[5]);
70
+
71
return fold_addsub2(ctx, op, true);
72
}
73
74
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
75
{
76
uint64_t z1, z2;
77
78
- if (fold_const2(ctx, op) ||
79
+ if (fold_const2_commutative(ctx, op) ||
80
fold_xi_to_i(ctx, op, 0) ||
81
fold_xi_to_x(ctx, op, -1) ||
82
fold_xx_to_x(ctx, op)) {
83
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
84
static bool fold_brcond(OptContext *ctx, TCGOp *op)
85
{
86
TCGCond cond = op->args[2];
87
- int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
88
+ int i;
89
90
+ if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
91
+ op->args[2] = cond = tcg_swap_cond(cond);
92
+ }
93
+
94
+ i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
95
if (i == 0) {
96
tcg_op_remove(ctx->tcg, op);
97
return true;
98
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond(OptContext *ctx, TCGOp *op)
99
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
100
{
101
TCGCond cond = op->args[4];
102
- int i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
103
TCGArg label = op->args[5];
104
- int inv = 0;
105
+ int i, inv = 0;
106
107
+ if (swap_commutative2(&op->args[0], &op->args[2])) {
108
+ op->args[4] = cond = tcg_swap_cond(cond);
109
+ }
110
+
111
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
112
if (i >= 0) {
113
goto do_brcond_const;
114
}
115
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
116
117
static bool fold_eqv(OptContext *ctx, TCGOp *op)
118
{
119
- if (fold_const2(ctx, op) ||
120
+ if (fold_const2_commutative(ctx, op) ||
121
fold_xi_to_x(ctx, op, -1) ||
122
fold_xi_to_not(ctx, op, 0)) {
123
return true;
124
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
125
static bool fold_movcond(OptContext *ctx, TCGOp *op)
126
{
127
TCGCond cond = op->args[5];
128
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
129
+ int i;
130
131
+ if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
132
+ op->args[5] = cond = tcg_swap_cond(cond);
133
+ }
134
+ /*
135
+ * Canonicalize the "false" input reg to match the destination reg so
136
+ * that the tcg backend can implement a "move if true" operation.
137
+ */
138
+ if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
139
+ op->args[5] = cond = tcg_invert_cond(cond);
140
+ }
141
+
142
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
143
if (i >= 0) {
144
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
145
}
146
@@ -XXX,XX +XXX,XX @@ static bool fold_mul(OptContext *ctx, TCGOp *op)
147
148
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
149
{
150
- if (fold_const2(ctx, op) ||
151
+ if (fold_const2_commutative(ctx, op) ||
152
fold_xi_to_i(ctx, op, 0)) {
153
return true;
154
}
155
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
156
157
static bool fold_multiply2(OptContext *ctx, TCGOp *op)
158
{
159
+ swap_commutative(op->args[0], &op->args[2], &op->args[3]);
160
+
161
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
162
uint64_t a = arg_info(op->args[2])->val;
163
uint64_t b = arg_info(op->args[3])->val;
164
@@ -XXX,XX +XXX,XX @@ static bool fold_multiply2(OptContext *ctx, TCGOp *op)
165
166
static bool fold_nand(OptContext *ctx, TCGOp *op)
167
{
168
- if (fold_const2(ctx, op) ||
169
+ if (fold_const2_commutative(ctx, op) ||
170
fold_xi_to_not(ctx, op, -1)) {
171
return true;
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
174
175
static bool fold_nor(OptContext *ctx, TCGOp *op)
176
{
177
- if (fold_const2(ctx, op) ||
178
+ if (fold_const2_commutative(ctx, op) ||
179
fold_xi_to_not(ctx, op, 0)) {
180
return true;
181
}
182
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
183
184
static bool fold_or(OptContext *ctx, TCGOp *op)
185
{
186
- if (fold_const2(ctx, op) ||
187
+ if (fold_const2_commutative(ctx, op) ||
188
fold_xi_to_x(ctx, op, 0) ||
189
fold_xx_to_x(ctx, op)) {
190
return true;
191
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
192
static bool fold_setcond(OptContext *ctx, TCGOp *op)
193
{
194
TCGCond cond = op->args[3];
195
- int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
196
+ int i;
197
198
+ if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
199
+ op->args[3] = cond = tcg_swap_cond(cond);
200
+ }
201
+
202
+ i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
203
if (i >= 0) {
204
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
205
}
206
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
207
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
208
{
209
TCGCond cond = op->args[5];
210
- int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
211
- int inv = 0;
212
+ int i, inv = 0;
213
214
+ if (swap_commutative2(&op->args[1], &op->args[3])) {
215
+ op->args[5] = cond = tcg_swap_cond(cond);
216
+ }
217
+
218
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
219
if (i >= 0) {
220
goto do_setcond_const;
221
}
222
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
223
224
static bool fold_xor(OptContext *ctx, TCGOp *op)
225
{
226
- if (fold_const2(ctx, op) ||
227
+ if (fold_const2_commutative(ctx, op) ||
228
fold_xx_to_i(ctx, op, 0) ||
229
fold_xi_to_x(ctx, op, 0) ||
230
fold_xi_to_not(ctx, op, -1)) {
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
ctx.type = TCG_TYPE_I32;
233
}
234
235
- /* For commutative operations make constant second argument */
236
- switch (opc) {
237
- CASE_OP_32_64_VEC(add):
238
- CASE_OP_32_64_VEC(mul):
239
- CASE_OP_32_64_VEC(and):
240
- CASE_OP_32_64_VEC(or):
241
- CASE_OP_32_64_VEC(xor):
242
- CASE_OP_32_64(eqv):
243
- CASE_OP_32_64(nand):
244
- CASE_OP_32_64(nor):
245
- CASE_OP_32_64(muluh):
246
- CASE_OP_32_64(mulsh):
247
- swap_commutative(op->args[0], &op->args[1], &op->args[2]);
248
- break;
249
- CASE_OP_32_64(brcond):
250
- if (swap_commutative(-1, &op->args[0], &op->args[1])) {
251
- op->args[2] = tcg_swap_cond(op->args[2]);
252
- }
253
- break;
254
- CASE_OP_32_64(setcond):
255
- if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
256
- op->args[3] = tcg_swap_cond(op->args[3]);
257
- }
258
- break;
259
- CASE_OP_32_64(movcond):
260
- if (swap_commutative(-1, &op->args[1], &op->args[2])) {
261
- op->args[5] = tcg_swap_cond(op->args[5]);
262
- }
263
- /* For movcond, we canonicalize the "false" input reg to match
264
- the destination reg so that the tcg backend can implement
265
- a "move if true" operation. */
266
- if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
267
- op->args[5] = tcg_invert_cond(op->args[5]);
268
- }
269
- break;
270
- CASE_OP_32_64(add2):
271
- swap_commutative(op->args[0], &op->args[2], &op->args[4]);
272
- swap_commutative(op->args[1], &op->args[3], &op->args[5]);
273
- break;
274
- CASE_OP_32_64(mulu2):
275
- CASE_OP_32_64(muls2):
276
- swap_commutative(op->args[0], &op->args[2], &op->args[3]);
277
- break;
278
- case INDEX_op_brcond2_i32:
279
- if (swap_commutative2(&op->args[0], &op->args[2])) {
280
- op->args[4] = tcg_swap_cond(op->args[4]);
281
- }
282
- break;
283
- case INDEX_op_setcond2_i32:
284
- if (swap_commutative2(&op->args[1], &op->args[3])) {
285
- op->args[5] = tcg_swap_cond(op->args[5]);
286
- }
287
- break;
288
- default:
289
- break;
290
- }
291
-
18
-
292
/* Assume all bits affected, and no bits known zero. */
19
-#ifdef NEED_CPU_H
293
ctx.a_mask = -1;
20
-#include "cpu.h"
294
ctx.z_mask = -1;
21
-
22
/* Disassemble this for me please... (debugging). */
23
void disas(FILE *out, const void *code, size_t size);
24
void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size);
25
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size);
26
27
/* Look up symbol for debugging purpose. Returns "" if unknown. */
28
const char *lookup_symbol(uint64_t orig_addr);
29
-#endif
30
31
struct syminfo;
32
struct elf32_sym;
33
diff --git a/disas/disas.c b/disas/disas.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/disas/disas.c
36
+++ b/disas/disas.c
37
@@ -XXX,XX +XXX,XX @@
38
#include "disas/dis-asm.h"
39
#include "elf.h"
40
#include "qemu/qemu-print.h"
41
-
42
#include "disas/disas.h"
43
#include "disas/capstone.h"
44
+#include "hw/core/cpu.h"
45
+#include "exec/memory.h"
46
47
typedef struct CPUDebug {
48
struct disassemble_info info;
295
--
49
--
296
2.25.1
50
2.34.1
297
298
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
3
We'd like to move disas.c into the common code source set, where
4
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
4
CONFIG_USER_ONLY is not available anymore. So we have to move
5
Message-Id: <20211025191154.350831-5-luis.pires@eldorado.org.br>
5
the related code into a separate file instead.
6
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
Message-Id: <20230508133745.109463-2-thuth@redhat.com>
9
[rth: Type change done in a separate patch]
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
11
---
8
tests/unit/test-div128.c | 197 +++++++++++++++++++++++++++++++++++++++
12
disas/disas-internal.h | 21 ++++++++++++
9
tests/unit/meson.build | 1 +
13
disas/disas-mon.c | 65 ++++++++++++++++++++++++++++++++++++
10
2 files changed, 198 insertions(+)
14
disas/disas.c | 76 ++++--------------------------------------
11
create mode 100644 tests/unit/test-div128.c
15
disas/meson.build | 1 +
16
4 files changed, 93 insertions(+), 70 deletions(-)
17
create mode 100644 disas/disas-internal.h
18
create mode 100644 disas/disas-mon.c
12
19
13
diff --git a/tests/unit/test-div128.c b/tests/unit/test-div128.c
20
diff --git a/disas/disas-internal.h b/disas/disas-internal.h
14
new file mode 100644
21
new file mode 100644
15
index XXXXXXX..XXXXXXX
22
index XXXXXXX..XXXXXXX
16
--- /dev/null
23
--- /dev/null
17
+++ b/tests/unit/test-div128.c
24
+++ b/disas/disas-internal.h
18
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@
19
+/*
26
+/*
20
+ * Test 128-bit division functions
27
+ * Definitions used internally in the disassembly code
21
+ *
28
+ *
22
+ * Copyright (c) 2021 Instituto de Pesquisas Eldorado (eldorado.org.br)
29
+ * SPDX-License-Identifier: GPL-2.0-or-later
30
+ */
31
+
32
+#ifndef DISAS_INTERNAL_H
33
+#define DISAS_INTERNAL_H
34
+
35
+#include "disas/dis-asm.h"
36
+
37
+typedef struct CPUDebug {
38
+ struct disassemble_info info;
39
+ CPUState *cpu;
40
+} CPUDebug;
41
+
42
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu);
43
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
44
+ G_GNUC_PRINTF(2, 3);
45
+
46
+#endif
47
diff --git a/disas/disas-mon.c b/disas/disas-mon.c
48
new file mode 100644
49
index XXXXXXX..XXXXXXX
50
--- /dev/null
51
+++ b/disas/disas-mon.c
52
@@ -XXX,XX +XXX,XX @@
53
+/*
54
+ * Functions related to disassembly from the monitor
23
+ *
55
+ *
24
+ * This library is free software; you can redistribute it and/or
56
+ * SPDX-License-Identifier: GPL-2.0-or-later
25
+ * modify it under the terms of the GNU Lesser General Public
26
+ * License as published by the Free Software Foundation; either
27
+ * version 2.1 of the License, or (at your option) any later version.
28
+ *
29
+ * This library is distributed in the hope that it will be useful,
30
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32
+ * Lesser General Public License for more details.
33
+ *
34
+ * You should have received a copy of the GNU Lesser General Public
35
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
36
+ */
57
+ */
37
+
58
+
38
+#include "qemu/osdep.h"
59
+#include "qemu/osdep.h"
39
+#include "qemu/host-utils.h"
60
+#include "disas-internal.h"
40
+
61
+#include "disas/disas.h"
41
+typedef struct {
62
+#include "exec/memory.h"
42
+ uint64_t high;
63
+#include "hw/core/cpu.h"
43
+ uint64_t low;
64
+#include "monitor/monitor.h"
44
+ uint64_t rhigh;
65
+
45
+ uint64_t rlow;
66
+static int
46
+ uint64_t divisor;
67
+physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
47
+ uint64_t remainder;
68
+ struct disassemble_info *info)
48
+} test_data_unsigned;
49
+
50
+typedef struct {
51
+ int64_t high;
52
+ uint64_t low;
53
+ int64_t rhigh;
54
+ uint64_t rlow;
55
+ int64_t divisor;
56
+ int64_t remainder;
57
+} test_data_signed;
58
+
59
+static const test_data_unsigned test_table_unsigned[] = {
60
+ /* Dividend fits in 64 bits */
61
+ { 0x0000000000000000ULL, 0x0000000000000000ULL,
62
+ 0x0000000000000000ULL, 0x0000000000000000ULL,
63
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
64
+ { 0x0000000000000000ULL, 0x0000000000000001ULL,
65
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
66
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
67
+ { 0x0000000000000000ULL, 0x0000000000000003ULL,
68
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
69
+ 0x0000000000000002ULL, 0x0000000000000001ULL},
70
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
71
+ 0x0000000000000000ULL, 0x8000000000000000ULL,
72
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
73
+ { 0x0000000000000000ULL, 0xa000000000000000ULL,
74
+ 0x0000000000000000ULL, 0x0000000000000002ULL,
75
+ 0x4000000000000000ULL, 0x2000000000000000ULL},
76
+ { 0x0000000000000000ULL, 0x8000000000000000ULL,
77
+ 0x0000000000000000ULL, 0x0000000000000001ULL,
78
+ 0x8000000000000000ULL, 0x0000000000000000ULL},
79
+
80
+ /* Dividend > 64 bits, with MSB 0 */
81
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
82
+ 0x123456789abcdefeULL, 0xefedcba987654321ULL,
83
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
84
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
85
+ 0x0000000000000001ULL, 0x000000000000000dULL,
86
+ 0x123456789abcdefeULL, 0x03456789abcdf03bULL},
87
+ { 0x123456789abcdefeULL, 0xefedcba987654321ULL,
88
+ 0x0123456789abcdefULL, 0xeefedcba98765432ULL,
89
+ 0x0000000000000010ULL, 0x0000000000000001ULL},
90
+
91
+ /* Dividend > 64 bits, with MSB 1 */
92
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
93
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
94
+ 0x0000000000000001ULL, 0x0000000000000000ULL},
95
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
96
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
97
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
98
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
99
+ 0x0feeddccbbaa9988ULL, 0x7766554433221100ULL,
100
+ 0x0000000000000010ULL, 0x000000000000000fULL},
101
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
102
+ 0x000000000000000eULL, 0x00f0f0f0f0f0f35aULL,
103
+ 0x123456789abcdefeULL, 0x0f8922bc55ef90c3ULL},
104
+
105
+ /**
106
+ * Divisor == 64 bits, with MSB 1
107
+ * and high 64 bits of dividend >= divisor
108
+ * (for testing normalization)
109
+ */
110
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
111
+ 0x0000000000000001ULL, 0x0000000000000000ULL,
112
+ 0xfeeddccbbaa99887ULL, 0x766554433221100fULL},
113
+ { 0xfeeddccbbaa99887ULL, 0x766554433221100fULL,
114
+ 0x0000000000000001ULL, 0xfddbb9977553310aULL,
115
+ 0x8000000000000001ULL, 0x78899aabbccddf05ULL},
116
+
117
+ /* Dividend > 64 bits, divisor almost as big */
118
+ { 0x0000000000000001ULL, 0x23456789abcdef01ULL,
119
+ 0x0000000000000000ULL, 0x000000000000000fULL,
120
+ 0x123456789abcdefeULL, 0x123456789abcde1fULL},
121
+};
122
+
123
+static const test_data_signed test_table_signed[] = {
124
+ /* Positive dividend, positive/negative divisors */
125
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
126
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
127
+ 0x0000000000000001LL, 0x0000000000000000LL},
128
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
129
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
130
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
131
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
132
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
133
+ 0x0000000000000002LL, 0x0000000000000000LL},
134
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
135
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
136
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
137
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
138
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
139
+ 0x0000000000000008LL, 0x0000000000000006LL},
140
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
141
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
142
+ 0xfffffffffffffff8LL, 0x0000000000000006LL},
143
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
144
+ 0x0000000000000000LL, 0x000000000000550dULL,
145
+ 0x0000000000000237LL, 0x0000000000000183LL},
146
+ { 0x0000000000000000LL, 0x0000000000bc614eULL,
147
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
148
+ 0xfffffffffffffdc9LL, 0x0000000000000183LL},
149
+
150
+ /* Negative dividend, positive/negative divisors */
151
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
152
+ 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
153
+ 0x0000000000000001LL, 0x0000000000000000LL},
154
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
155
+ 0x0000000000000000LL, 0x0000000000bc614eULL,
156
+ 0xffffffffffffffffLL, 0x0000000000000000LL},
157
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
158
+ 0xffffffffffffffffLL, 0xffffffffffa1cf59ULL,
159
+ 0x0000000000000002LL, 0x0000000000000000LL},
160
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
161
+ 0x0000000000000000LL, 0x00000000005e30a7ULL,
162
+ 0xfffffffffffffffeLL, 0x0000000000000000LL},
163
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
164
+ 0xffffffffffffffffLL, 0xffffffffffe873d7ULL,
165
+ 0x0000000000000008LL, 0xfffffffffffffffaLL},
166
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
167
+ 0x0000000000000000LL, 0x0000000000178c29ULL,
168
+ 0xfffffffffffffff8LL, 0xfffffffffffffffaLL},
169
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
170
+ 0xffffffffffffffffLL, 0xffffffffffffaaf3ULL,
171
+ 0x0000000000000237LL, 0xfffffffffffffe7dLL},
172
+ { 0xffffffffffffffffLL, 0xffffffffff439eb2ULL,
173
+ 0x0000000000000000LL, 0x000000000000550dULL,
174
+ 0xfffffffffffffdc9LL, 0xfffffffffffffe7dLL},
175
+};
176
+
177
+static void test_divu128(void)
178
+{
69
+{
179
+ int i;
70
+ CPUDebug *s = container_of(info, CPUDebug, info);
180
+ uint64_t rem;
71
+ MemTxResult res;
181
+ test_data_unsigned tmp;
72
+
182
+
73
+ res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
183
+ for (i = 0; i < ARRAY_SIZE(test_table_unsigned); ++i) {
74
+ myaddr, length);
184
+ tmp = test_table_unsigned[i];
75
+ return res == MEMTX_OK ? 0 : EIO;
185
+
186
+ rem = divu128(&tmp.low, &tmp.high, tmp.divisor);
187
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
188
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
189
+ g_assert_cmpuint(rem, ==, tmp.remainder);
190
+ }
191
+}
76
+}
192
+
77
+
193
+static void test_divs128(void)
78
+/* Disassembler for the monitor. */
79
+void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
80
+ int nb_insn, bool is_physical)
194
+{
81
+{
195
+ int i;
82
+ int count, i;
196
+ int64_t rem;
83
+ CPUDebug s;
197
+ test_data_signed tmp;
84
+ g_autoptr(GString) ds = g_string_new("");
198
+
85
+
199
+ for (i = 0; i < ARRAY_SIZE(test_table_signed); ++i) {
86
+ disas_initialize_debug_target(&s, cpu);
200
+ tmp = test_table_signed[i];
87
+ s.info.fprintf_func = disas_gstring_printf;
201
+
88
+ s.info.stream = (FILE *)ds; /* abuse this slot */
202
+ rem = divs128(&tmp.low, &tmp.high, tmp.divisor);
89
+
203
+ g_assert_cmpuint(tmp.low, ==, tmp.rlow);
90
+ if (is_physical) {
204
+ g_assert_cmpuint(tmp.high, ==, tmp.rhigh);
91
+ s.info.read_memory_func = physical_read_memory;
205
+ g_assert_cmpuint(rem, ==, tmp.remainder);
92
+ }
206
+ }
93
+ s.info.buffer_vma = pc;
94
+
95
+ if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
96
+ monitor_puts(mon, ds->str);
97
+ return;
98
+ }
99
+
100
+ if (!s.info.print_insn) {
101
+ monitor_printf(mon, "0x%08" PRIx64
102
+ ": Asm output not supported on this arch\n", pc);
103
+ return;
104
+ }
105
+
106
+ for (i = 0; i < nb_insn; i++) {
107
+ g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
108
+ count = s.info.print_insn(pc, &s.info);
109
+ g_string_append_c(ds, '\n');
110
+ if (count < 0) {
111
+ break;
112
+ }
113
+ pc += count;
114
+ }
115
+
116
+ monitor_puts(mon, ds->str);
207
+}
117
+}
208
+
118
diff --git a/disas/disas.c b/disas/disas.c
209
+int main(int argc, char **argv)
210
+{
211
+ g_test_init(&argc, &argv, NULL);
212
+ g_test_add_func("/host-utils/test_divu128", test_divu128);
213
+ g_test_add_func("/host-utils/test_divs128", test_divs128);
214
+ return g_test_run();
215
+}
216
diff --git a/tests/unit/meson.build b/tests/unit/meson.build
217
index XXXXXXX..XXXXXXX 100644
119
index XXXXXXX..XXXXXXX 100644
218
--- a/tests/unit/meson.build
120
--- a/disas/disas.c
219
+++ b/tests/unit/meson.build
121
+++ b/disas/disas.c
220
@@ -XXX,XX +XXX,XX @@ tests = {
122
@@ -XXX,XX +XXX,XX @@
221
# all code tested by test-x86-cpuid is inside topology.h
123
/* General "disassemble this chunk" code. Used for debugging. */
222
'test-x86-cpuid': [],
124
#include "qemu/osdep.h"
223
'test-cutils': [],
125
-#include "disas/dis-asm.h"
224
+ 'test-div128': [],
126
+#include "disas/disas-internal.h"
225
'test-shift128': [],
127
#include "elf.h"
226
'test-mul64': [],
128
#include "qemu/qemu-print.h"
227
# all code tested by test-int128 is inside int128.h
129
#include "disas/disas.h"
130
@@ -XXX,XX +XXX,XX @@
131
#include "hw/core/cpu.h"
132
#include "exec/memory.h"
133
134
-typedef struct CPUDebug {
135
- struct disassemble_info info;
136
- CPUState *cpu;
137
-} CPUDebug;
138
-
139
/* Filled in by elfload.c. Simplistic, but will do for now. */
140
struct syminfo *syminfos = NULL;
141
142
@@ -XXX,XX +XXX,XX @@ static void initialize_debug(CPUDebug *s)
143
s->info.symbol_at_address_func = symbol_at_address;
144
}
145
146
-static void initialize_debug_target(CPUDebug *s, CPUState *cpu)
147
+void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
148
{
149
initialize_debug(s);
150
151
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
152
int count;
153
CPUDebug s;
154
155
- initialize_debug_target(&s, cpu);
156
+ disas_initialize_debug_target(&s, cpu);
157
s.info.fprintf_func = fprintf;
158
s.info.stream = out;
159
s.info.buffer_vma = code;
160
@@ -XXX,XX +XXX,XX @@ void target_disas(FILE *out, CPUState *cpu, uint64_t code, size_t size)
161
}
162
}
163
164
-static int G_GNUC_PRINTF(2, 3)
165
-gstring_printf(FILE *stream, const char *fmt, ...)
166
+int disas_gstring_printf(FILE *stream, const char *fmt, ...)
167
{
168
/* We abuse the FILE parameter to pass a GString. */
169
GString *s = (GString *)stream;
170
@@ -XXX,XX +XXX,XX @@ char *plugin_disas(CPUState *cpu, uint64_t addr, size_t size)
171
CPUDebug s;
172
GString *ds = g_string_new(NULL);
173
174
- initialize_debug_target(&s, cpu);
175
- s.info.fprintf_func = gstring_printf;
176
+ disas_initialize_debug_target(&s, cpu);
177
+ s.info.fprintf_func = disas_gstring_printf;
178
s.info.stream = (FILE *)ds; /* abuse this slot */
179
s.info.buffer_vma = addr;
180
s.info.buffer_length = size;
181
@@ -XXX,XX +XXX,XX @@ const char *lookup_symbol(uint64_t orig_addr)
182
183
return symbol;
184
}
185
-
186
-#if !defined(CONFIG_USER_ONLY)
187
-
188
-#include "monitor/monitor.h"
189
-
190
-static int
191
-physical_read_memory(bfd_vma memaddr, bfd_byte *myaddr, int length,
192
- struct disassemble_info *info)
193
-{
194
- CPUDebug *s = container_of(info, CPUDebug, info);
195
- MemTxResult res;
196
-
197
- res = address_space_read(s->cpu->as, memaddr, MEMTXATTRS_UNSPECIFIED,
198
- myaddr, length);
199
- return res == MEMTX_OK ? 0 : EIO;
200
-}
201
-
202
-/* Disassembler for the monitor. */
203
-void monitor_disas(Monitor *mon, CPUState *cpu, uint64_t pc,
204
- int nb_insn, bool is_physical)
205
-{
206
- int count, i;
207
- CPUDebug s;
208
- g_autoptr(GString) ds = g_string_new("");
209
-
210
- initialize_debug_target(&s, cpu);
211
- s.info.fprintf_func = gstring_printf;
212
- s.info.stream = (FILE *)ds; /* abuse this slot */
213
-
214
- if (is_physical) {
215
- s.info.read_memory_func = physical_read_memory;
216
- }
217
- s.info.buffer_vma = pc;
218
-
219
- if (s.info.cap_arch >= 0 && cap_disas_monitor(&s.info, pc, nb_insn)) {
220
- monitor_puts(mon, ds->str);
221
- return;
222
- }
223
-
224
- if (!s.info.print_insn) {
225
- monitor_printf(mon, "0x%08" PRIx64
226
- ": Asm output not supported on this arch\n", pc);
227
- return;
228
- }
229
-
230
- for (i = 0; i < nb_insn; i++) {
231
- g_string_append_printf(ds, "0x%08" PRIx64 ": ", pc);
232
- count = s.info.print_insn(pc, &s.info);
233
- g_string_append_c(ds, '\n');
234
- if (count < 0) {
235
- break;
236
- }
237
- pc += count;
238
- }
239
-
240
- monitor_puts(mon, ds->str);
241
-}
242
-#endif
243
diff --git a/disas/meson.build b/disas/meson.build
244
index XXXXXXX..XXXXXXX 100644
245
--- a/disas/meson.build
246
+++ b/disas/meson.build
247
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
248
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
249
common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
250
251
+softmmu_ss.add(files('disas-mon.c'))
252
specific_ss.add(files('disas.c'), capstone)
228
--
253
--
229
2.25.1
254
2.34.1
230
231
diff view generated by jsdifflib
1
Rename to fold_multiply2, and handle muls2_i32, mulu2_i64,
1
From: Thomas Huth <thuth@redhat.com>
2
and muls2_i64.
3
2
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
By using target_words_bigendian() instead of an ifdef,
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
we can build this code once.
5
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Message-Id: <20230508133745.109463-3-thuth@redhat.com>
8
[rth: Type change done in a separate patch]
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
10
---
8
tcg/optimize.c | 44 +++++++++++++++++++++++++++++++++++---------
11
disas/disas.c | 10 +++++-----
9
1 file changed, 35 insertions(+), 9 deletions(-)
12
disas/meson.build | 3 ++-
13
2 files changed, 7 insertions(+), 6 deletions(-)
10
14
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/disas/disas.c b/disas/disas.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
17
--- a/disas/disas.c
14
+++ b/tcg/optimize.c
18
+++ b/disas/disas.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
19
@@ -XXX,XX +XXX,XX @@ void disas_initialize_debug_target(CPUDebug *s, CPUState *cpu)
16
return false;
20
s->cpu = cpu;
17
}
21
s->info.read_memory_func = target_read_memory;
18
22
s->info.print_address_func = print_address;
19
-static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
23
-#if TARGET_BIG_ENDIAN
20
+static bool fold_multiply2(OptContext *ctx, TCGOp *op)
24
- s->info.endian = BFD_ENDIAN_BIG;
21
{
25
-#else
22
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
26
- s->info.endian = BFD_ENDIAN_LITTLE;
23
- uint32_t a = arg_info(op->args[2])->val;
27
-#endif
24
- uint32_t b = arg_info(op->args[3])->val;
28
+ if (target_words_bigendian()) {
25
- uint64_t r = (uint64_t)a * b;
29
+ s->info.endian = BFD_ENDIAN_BIG;
26
+ uint64_t a = arg_info(op->args[2])->val;
30
+ } else {
27
+ uint64_t b = arg_info(op->args[3])->val;
31
+ s->info.endian = BFD_ENDIAN_LITTLE;
28
+ uint64_t h, l;
32
+ }
29
TCGArg rl, rh;
33
30
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
34
CPUClass *cc = CPU_GET_CLASS(cpu);
31
+ TCGOp *op2;
35
if (cc->disas_set_info) {
32
+
36
diff --git a/disas/meson.build b/disas/meson.build
33
+ switch (op->opc) {
37
index XXXXXXX..XXXXXXX 100644
34
+ case INDEX_op_mulu2_i32:
38
--- a/disas/meson.build
35
+ l = (uint64_t)(uint32_t)a * (uint32_t)b;
39
+++ b/disas/meson.build
36
+ h = (int32_t)(l >> 32);
40
@@ -XXX,XX +XXX,XX @@ common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
37
+ l = (int32_t)l;
41
common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
38
+ break;
42
common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
39
+ case INDEX_op_muls2_i32:
43
common_ss.add(when: capstone, if_true: [files('capstone.c'), capstone])
40
+ l = (int64_t)(int32_t)a * (int32_t)b;
44
+common_ss.add(files('disas.c'))
41
+ h = l >> 32;
45
42
+ l = (int32_t)l;
46
softmmu_ss.add(files('disas-mon.c'))
43
+ break;
47
-specific_ss.add(files('disas.c'), capstone)
44
+ case INDEX_op_mulu2_i64:
48
+specific_ss.add(capstone)
45
+ mulu64(&l, &h, a, b);
46
+ break;
47
+ case INDEX_op_muls2_i64:
48
+ muls64(&l, &h, a, b);
49
+ break;
50
+ default:
51
+ g_assert_not_reached();
52
+ }
53
54
rl = op->args[0];
55
rh = op->args[1];
56
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
57
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
58
+
59
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
60
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
61
+
62
+ tcg_opt_gen_movi(ctx, op, rl, l);
63
+ tcg_opt_gen_movi(ctx, op2, rh, h);
64
return true;
65
}
66
return false;
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
CASE_OP_32_64(muluh):
69
done = fold_mul_highpart(&ctx, op);
70
break;
71
- case INDEX_op_mulu2_i32:
72
- done = fold_mulu2_i32(&ctx, op);
73
+ CASE_OP_32_64(muls2):
74
+ CASE_OP_32_64(mulu2):
75
+ done = fold_multiply2(&ctx, op);
76
break;
77
CASE_OP_32_64(nand):
78
done = fold_nand(&ctx, op);
79
--
49
--
80
2.25.1
50
2.34.1
81
82
diff view generated by jsdifflib
1
Rename to fold_addsub2.
1
From: Jamie Iles <quic_jiles@quicinc.com>
2
Use Int128 to implement the wider operation.
3
2
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Expose qemu_cpu_list_lock globally so that we can use
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
WITH_QEMU_LOCK_GUARD and QEMU_LOCK_GUARD to simplify a few code paths
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
now and in future.
6
7
Signed-off-by: Jamie Iles <quic_jiles@quicinc.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
10
Message-Id: <20230427020925.51003-2-quic_jiles@quicinc.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
12
---
9
tcg/optimize.c | 65 ++++++++++++++++++++++++++++++++++----------------
13
include/exec/cpu-common.h | 1 +
10
1 file changed, 44 insertions(+), 21 deletions(-)
14
cpus-common.c | 2 +-
15
linux-user/elfload.c | 13 +++++++------
16
migration/dirtyrate.c | 26 +++++++++++++-------------
17
trace/control-target.c | 9 ++++-----
18
5 files changed, 26 insertions(+), 25 deletions(-)
11
19
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
20
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
13
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
22
--- a/include/exec/cpu-common.h
15
+++ b/tcg/optimize.c
23
+++ b/include/exec/cpu-common.h
24
@@ -XXX,XX +XXX,XX @@ extern intptr_t qemu_host_page_mask;
25
#define REAL_HOST_PAGE_ALIGN(addr) ROUND_UP((addr), qemu_real_host_page_size())
26
27
/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
28
+extern QemuMutex qemu_cpu_list_lock;
29
void qemu_init_cpu_list(void);
30
void cpu_list_lock(void);
31
void cpu_list_unlock(void);
32
diff --git a/cpus-common.c b/cpus-common.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/cpus-common.c
35
+++ b/cpus-common.c
36
@@ -XXX,XX +XXX,XX @@
37
#include "qemu/lockable.h"
38
#include "trace/trace-root.h"
39
40
-static QemuMutex qemu_cpu_list_lock;
41
+QemuMutex qemu_cpu_list_lock;
42
static QemuCond exclusive_cond;
43
static QemuCond exclusive_resume;
44
static QemuCond qemu_work_cond;
45
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
46
index XXXXXXX..XXXXXXX 100644
47
--- a/linux-user/elfload.c
48
+++ b/linux-user/elfload.c
49
@@ -XXX,XX +XXX,XX @@
50
#include "qemu/guest-random.h"
51
#include "qemu/units.h"
52
#include "qemu/selfmap.h"
53
+#include "qemu/lockable.h"
54
#include "qapi/error.h"
55
#include "qemu/error-report.h"
56
#include "target_signal.h"
57
@@ -XXX,XX +XXX,XX @@ static int fill_note_info(struct elf_note_info *info,
58
info->notes_size += note_size(&info->notes[i]);
59
60
/* read and fill status of all threads */
61
- cpu_list_lock();
62
- CPU_FOREACH(cpu) {
63
- if (cpu == thread_cpu) {
64
- continue;
65
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
66
+ CPU_FOREACH(cpu) {
67
+ if (cpu == thread_cpu) {
68
+ continue;
69
+ }
70
+ fill_thread_info(info, cpu->env_ptr);
71
}
72
- fill_thread_info(info, cpu->env_ptr);
73
}
74
- cpu_list_unlock();
75
76
return (0);
77
}
78
diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/migration/dirtyrate.c
81
+++ b/migration/dirtyrate.c
82
@@ -XXX,XX +XXX,XX @@ int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
83
retry:
84
init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
85
86
- cpu_list_lock();
87
- gen_id = cpu_list_generation_id_get();
88
- records = vcpu_dirty_stat_alloc(stat);
89
- vcpu_dirty_stat_collect(stat, records, true);
90
- cpu_list_unlock();
91
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
92
+ gen_id = cpu_list_generation_id_get();
93
+ records = vcpu_dirty_stat_alloc(stat);
94
+ vcpu_dirty_stat_collect(stat, records, true);
95
+ }
96
97
duration = dirty_stat_wait(calc_time_ms, init_time_ms);
98
99
global_dirty_log_sync(flag, one_shot);
100
101
- cpu_list_lock();
102
- if (gen_id != cpu_list_generation_id_get()) {
103
- g_free(records);
104
- g_free(stat->rates);
105
- cpu_list_unlock();
106
- goto retry;
107
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
108
+ if (gen_id != cpu_list_generation_id_get()) {
109
+ g_free(records);
110
+ g_free(stat->rates);
111
+ cpu_list_unlock();
112
+ goto retry;
113
+ }
114
+ vcpu_dirty_stat_collect(stat, records, false);
115
}
116
- vcpu_dirty_stat_collect(stat, records, false);
117
- cpu_list_unlock();
118
119
for (i = 0; i < stat->nvcpu; i++) {
120
dirtyrate = do_calculate_dirtyrate(records[i], duration);
121
diff --git a/trace/control-target.c b/trace/control-target.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/trace/control-target.c
124
+++ b/trace/control-target.c
16
@@ -XXX,XX +XXX,XX @@
125
@@ -XXX,XX +XXX,XX @@
17
*/
126
*/
18
127
19
#include "qemu/osdep.h"
128
#include "qemu/osdep.h"
20
+#include "qemu/int128.h"
129
+#include "qemu/lockable.h"
21
#include "tcg/tcg-op.h"
130
#include "cpu.h"
22
#include "tcg-internal.h"
131
#include "trace/trace-root.h"
23
132
#include "trace/control.h"
24
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
133
@@ -XXX,XX +XXX,XX @@ static bool adding_first_cpu1(void)
25
return false;
134
135
static bool adding_first_cpu(void)
136
{
137
- bool res;
138
- cpu_list_lock();
139
- res = adding_first_cpu1();
140
- cpu_list_unlock();
141
- return res;
142
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
143
+
144
+ return adding_first_cpu1();
26
}
145
}
27
146
28
-static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
147
void trace_init_vcpu(CPUState *vcpu)
29
+static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
30
{
31
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
32
arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
33
- uint32_t al = arg_info(op->args[2])->val;
34
- uint32_t ah = arg_info(op->args[3])->val;
35
- uint32_t bl = arg_info(op->args[4])->val;
36
- uint32_t bh = arg_info(op->args[5])->val;
37
- uint64_t a = ((uint64_t)ah << 32) | al;
38
- uint64_t b = ((uint64_t)bh << 32) | bl;
39
+ uint64_t al = arg_info(op->args[2])->val;
40
+ uint64_t ah = arg_info(op->args[3])->val;
41
+ uint64_t bl = arg_info(op->args[4])->val;
42
+ uint64_t bh = arg_info(op->args[5])->val;
43
TCGArg rl, rh;
44
- TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
45
+ TCGOp *op2;
46
47
- if (add) {
48
- a += b;
49
+ if (ctx->type == TCG_TYPE_I32) {
50
+ uint64_t a = deposit64(al, 32, 32, ah);
51
+ uint64_t b = deposit64(bl, 32, 32, bh);
52
+
53
+ if (add) {
54
+ a += b;
55
+ } else {
56
+ a -= b;
57
+ }
58
+
59
+ al = sextract64(a, 0, 32);
60
+ ah = sextract64(a, 32, 32);
61
} else {
62
- a -= b;
63
+ Int128 a = int128_make128(al, ah);
64
+ Int128 b = int128_make128(bl, bh);
65
+
66
+ if (add) {
67
+ a = int128_add(a, b);
68
+ } else {
69
+ a = int128_sub(a, b);
70
+ }
71
+
72
+ al = int128_getlo(a);
73
+ ah = int128_gethi(a);
74
}
75
76
rl = op->args[0];
77
rh = op->args[1];
78
- tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
79
- tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
80
+
81
+ /* The proper opcode is supplied by tcg_opt_gen_mov. */
82
+ op2 = tcg_op_insert_before(ctx->tcg, op, 0);
83
+
84
+ tcg_opt_gen_movi(ctx, op, rl, al);
85
+ tcg_opt_gen_movi(ctx, op2, rh, ah);
86
return true;
87
}
88
return false;
89
}
90
91
-static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
92
+static bool fold_add2(OptContext *ctx, TCGOp *op)
93
{
94
- return fold_addsub2_i32(ctx, op, true);
95
+ return fold_addsub2(ctx, op, true);
96
}
97
98
static bool fold_and(OptContext *ctx, TCGOp *op)
99
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
100
return false;
101
}
102
103
-static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
104
+static bool fold_sub2(OptContext *ctx, TCGOp *op)
105
{
106
- return fold_addsub2_i32(ctx, op, false);
107
+ return fold_addsub2(ctx, op, false);
108
}
109
110
static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
CASE_OP_32_64_VEC(add):
113
done = fold_add(&ctx, op);
114
break;
115
- case INDEX_op_add2_i32:
116
- done = fold_add2_i32(&ctx, op);
117
+ CASE_OP_32_64(add2):
118
+ done = fold_add2(&ctx, op);
119
break;
120
CASE_OP_32_64_VEC(and):
121
done = fold_and(&ctx, op);
122
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
123
CASE_OP_32_64_VEC(sub):
124
done = fold_sub(&ctx, op);
125
break;
126
- case INDEX_op_sub2_i32:
127
- done = fold_sub2_i32(&ctx, op);
128
+ CASE_OP_32_64(sub2):
129
+ done = fold_sub2(&ctx, op);
130
break;
131
CASE_OP_32_64_VEC(xor):
132
done = fold_xor(&ctx, op);
133
--
148
--
134
2.25.1
149
2.34.1
135
150
136
151
diff view generated by jsdifflib
1
From: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
1
From: Jamie Iles <quic_jiles@quicinc.com>
2
2
3
Addition of not and xor on 128-bit integers.
3
The round-robin scheduler will iterate over the CPU list with an
4
4
assigned budget until the next timer expiry and may exit early because
5
Signed-off-by: Frédéric Pétrot <frederic.petrot@univ-grenoble-alpes.fr>
5
of a TB exit. This is fine under normal operation but with icount
6
Co-authored-by: Fabien Portas <fabien.portas@grenoble-inp.org>
6
enabled and SMP it is possible for a CPU to be starved of run time and
7
Message-Id: <20211025122818.168890-3-frederic.petrot@univ-grenoble-alpes.fr>
7
the system live-locks.
8
[rth: Split out logical operations.]
8
9
For example, booting a riscv64 platform with '-icount
10
shift=0,align=off,sleep=on -smp 2' we observe a livelock once the kernel
11
has timers enabled and starts performing TLB shootdowns. In this case
12
we have CPU 0 in M-mode with interrupts disabled sending an IPI to CPU
13
1. As we enter the TCG loop, we assign the icount budget to next timer
14
interrupt to CPU 0 and begin executing where the guest is sat in a busy
15
loop exhausting all of the budget before we try to execute CPU 1 which
16
is the target of the IPI but CPU 1 is left with no budget with which to
17
execute and the process repeats.
18
19
We try here to add some fairness by splitting the budget across all of
20
the CPUs on the thread fairly before entering each one. The CPU count
21
is cached on CPU list generation ID to avoid iterating the list on each
22
loop iteration. With this change it is possible to boot an SMP rv64
23
guest with icount enabled and no hangs.
24
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
26
Tested-by: Peter Maydell <peter.maydell@linaro.org>
27
Signed-off-by: Jamie Iles <quic_jiles@quicinc.com>
9
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
28
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
29
Message-Id: <20230427020925.51003-3-quic_jiles@quicinc.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
30
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
31
---
12
include/qemu/int128.h | 20 ++++++++++++++++++++
32
accel/tcg/tcg-accel-ops-icount.h | 3 ++-
13
1 file changed, 20 insertions(+)
33
accel/tcg/tcg-accel-ops-icount.c | 21 ++++++++++++++----
14
34
accel/tcg/tcg-accel-ops-rr.c | 37 +++++++++++++++++++++++++++++++-
15
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
35
replay/replay.c | 3 +--
16
index XXXXXXX..XXXXXXX 100644
36
4 files changed, 56 insertions(+), 8 deletions(-)
17
--- a/include/qemu/int128.h
37
18
+++ b/include/qemu/int128.h
38
diff --git a/accel/tcg/tcg-accel-ops-icount.h b/accel/tcg/tcg-accel-ops-icount.h
19
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
39
index XXXXXXX..XXXXXXX 100644
20
return a;
40
--- a/accel/tcg/tcg-accel-ops-icount.h
41
+++ b/accel/tcg/tcg-accel-ops-icount.h
42
@@ -XXX,XX +XXX,XX @@
43
#define TCG_ACCEL_OPS_ICOUNT_H
44
45
void icount_handle_deadline(void);
46
-void icount_prepare_for_run(CPUState *cpu);
47
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget);
48
+int64_t icount_percpu_budget(int cpu_count);
49
void icount_process_data(CPUState *cpu);
50
51
void icount_handle_interrupt(CPUState *cpu, int mask);
52
diff --git a/accel/tcg/tcg-accel-ops-icount.c b/accel/tcg/tcg-accel-ops-icount.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/accel/tcg/tcg-accel-ops-icount.c
55
+++ b/accel/tcg/tcg-accel-ops-icount.c
56
@@ -XXX,XX +XXX,XX @@ void icount_handle_deadline(void)
57
}
21
}
58
}
22
59
23
+static inline Int128 int128_not(Int128 a)
60
-void icount_prepare_for_run(CPUState *cpu)
61
+/* Distribute the budget evenly across all CPUs */
62
+int64_t icount_percpu_budget(int cpu_count)
24
+{
63
+{
25
+ return ~a;
64
+ int64_t limit = icount_get_limit();
65
+ int64_t timeslice = limit / cpu_count;
66
+
67
+ if (timeslice == 0) {
68
+ timeslice = limit;
69
+ }
70
+
71
+ return timeslice;
26
+}
72
+}
27
+
73
+
28
static inline Int128 int128_and(Int128 a, Int128 b)
74
+void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
29
{
75
{
30
return a & b;
76
int insns_left;
31
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
77
32
return a | b;
78
@@ -XXX,XX +XXX,XX @@ void icount_prepare_for_run(CPUState *cpu)
79
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
80
g_assert(cpu->icount_extra == 0);
81
82
- cpu->icount_budget = icount_get_limit();
83
+ replay_mutex_lock();
84
+
85
+ cpu->icount_budget = MIN(icount_get_limit(), cpu_budget);
86
insns_left = MIN(0xffff, cpu->icount_budget);
87
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
88
cpu->icount_extra = cpu->icount_budget - insns_left;
89
90
- replay_mutex_lock();
91
-
92
if (cpu->icount_budget == 0) {
93
/*
94
* We're called without the iothread lock, so must take it while
95
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
96
index XXXXXXX..XXXXXXX 100644
97
--- a/accel/tcg/tcg-accel-ops-rr.c
98
+++ b/accel/tcg/tcg-accel-ops-rr.c
99
@@ -XXX,XX +XXX,XX @@
100
*/
101
102
#include "qemu/osdep.h"
103
+#include "qemu/lockable.h"
104
#include "sysemu/tcg.h"
105
#include "sysemu/replay.h"
106
#include "sysemu/cpu-timers.h"
107
@@ -XXX,XX +XXX,XX @@ static void rr_force_rcu(Notifier *notify, void *data)
108
rr_kick_next_cpu();
33
}
109
}
34
110
35
+static inline Int128 int128_xor(Int128 a, Int128 b)
111
+/*
112
+ * Calculate the number of CPUs that we will process in a single iteration of
113
+ * the main CPU thread loop so that we can fairly distribute the instruction
114
+ * count across CPUs.
115
+ *
116
+ * The CPU count is cached based on the CPU list generation ID to avoid
117
+ * iterating the list every time.
118
+ */
119
+static int rr_cpu_count(void)
36
+{
120
+{
37
+ return a ^ b;
121
+ static unsigned int last_gen_id = ~0;
122
+ static int cpu_count;
123
+ CPUState *cpu;
124
+
125
+ QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
126
+
127
+ if (cpu_list_generation_id_get() != last_gen_id) {
128
+ cpu_count = 0;
129
+ CPU_FOREACH(cpu) {
130
+ ++cpu_count;
131
+ }
132
+ last_gen_id = cpu_list_generation_id_get();
133
+ }
134
+
135
+ return cpu_count;
38
+}
136
+}
39
+
137
+
40
static inline Int128 int128_rshift(Int128 a, int n)
138
/*
139
* In the single-threaded case each vCPU is simulated in turn. If
140
* there is more than a single vCPU we create a simple timer to kick
141
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
142
cpu->exit_request = 1;
143
144
while (1) {
145
+ /* Only used for icount_enabled() */
146
+ int64_t cpu_budget = 0;
147
+
148
qemu_mutex_unlock_iothread();
149
replay_mutex_lock();
150
qemu_mutex_lock_iothread();
151
152
if (icount_enabled()) {
153
+ int cpu_count = rr_cpu_count();
154
+
155
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
156
icount_account_warp_timer();
157
/*
158
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
159
* waking up the I/O thread and waiting for completion.
160
*/
161
icount_handle_deadline();
162
+
163
+ cpu_budget = icount_percpu_budget(cpu_count);
164
}
165
166
replay_mutex_unlock();
167
@@ -XXX,XX +XXX,XX @@ static void *rr_cpu_thread_fn(void *arg)
168
169
qemu_mutex_unlock_iothread();
170
if (icount_enabled()) {
171
- icount_prepare_for_run(cpu);
172
+ icount_prepare_for_run(cpu, cpu_budget);
173
}
174
r = tcg_cpus_exec(cpu);
175
if (icount_enabled()) {
176
diff --git a/replay/replay.c b/replay/replay.c
177
index XXXXXXX..XXXXXXX 100644
178
--- a/replay/replay.c
179
+++ b/replay/replay.c
180
@@ -XXX,XX +XXX,XX @@ uint64_t replay_get_current_icount(void)
181
int replay_get_instructions(void)
41
{
182
{
42
return a >> n;
183
int res = 0;
43
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_exts64(int64_t a)
184
- replay_mutex_lock();
44
return int128_make128(a, (a < 0) ? -1 : 0);
185
+ g_assert(replay_mutex_locked());
186
if (replay_next_event_is(EVENT_INSTRUCTION)) {
187
res = replay_state.instruction_count;
188
if (replay_break_icount != -1LL) {
189
@@ -XXX,XX +XXX,XX @@ int replay_get_instructions(void)
190
}
191
}
192
}
193
- replay_mutex_unlock();
194
return res;
45
}
195
}
46
196
47
+static inline Int128 int128_not(Int128 a)
48
+{
49
+ return int128_make128(~a.lo, ~a.hi);
50
+}
51
+
52
static inline Int128 int128_and(Int128 a, Int128 b)
53
{
54
return int128_make128(a.lo & b.lo, a.hi & b.hi);
55
@@ -XXX,XX +XXX,XX @@ static inline Int128 int128_or(Int128 a, Int128 b)
56
return int128_make128(a.lo | b.lo, a.hi | b.hi);
57
}
58
59
+static inline Int128 int128_xor(Int128 a, Int128 b)
60
+{
61
+ return int128_make128(a.lo ^ b.lo, a.hi ^ b.hi);
62
+}
63
+
64
static inline Int128 int128_rshift(Int128 a, int n)
65
{
66
int64_t h;
67
--
197
--
68
2.25.1
198
2.34.1
69
199
70
200
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Merge tcg_out_tlb_load, add_qemu_ldst_label,
2
tcg_out_test_alignment, and some code that lived in both
3
tcg_out_qemu_ld and tcg_out_qemu_st into one function
4
that returns HostAddress and TCGLabelQemuLdst structures.
2
5
3
Move udiv_qrnnd() from include/fpu/softfloat-macros.h to host-utils,
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
so it can be reused by divu128().
5
6
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
7
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
8
Message-Id: <20211025191154.350831-3-luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
8
---
11
include/fpu/softfloat-macros.h | 82 ----------------------------------
9
tcg/i386/tcg-target.c.inc | 346 ++++++++++++++++----------------------
12
include/qemu/host-utils.h | 81 +++++++++++++++++++++++++++++++++
10
1 file changed, 145 insertions(+), 201 deletions(-)
13
2 files changed, 81 insertions(+), 82 deletions(-)
14
11
15
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
16
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
17
--- a/include/fpu/softfloat-macros.h
14
--- a/tcg/i386/tcg-target.c.inc
18
+++ b/include/fpu/softfloat-macros.h
15
+++ b/tcg/i386/tcg-target.c.inc
19
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
20
* so some portions are provided under:
17
[MO_BEUQ] = helper_be_stq_mmu,
21
* the SoftFloat-2a license
18
};
22
* the BSD license
19
23
- * GPL-v2-or-later
20
-/* Perform the TLB load and compare.
24
*
21
-
25
* Any future contributions to this file after December 1st 2014 will be
22
- Inputs:
26
* taken to be licensed under the Softfloat-2a license unless specifically
23
- ADDRLO and ADDRHI contain the low and high part of the address.
27
@@ -XXX,XX +XXX,XX @@ this code that are retained.
24
-
28
* THE POSSIBILITY OF SUCH DAMAGE.
25
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
29
*/
26
-
30
27
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
31
-/* Portions of this work are licensed under the terms of the GNU GPL,
28
- This should be offsetof addr_read or addr_write.
32
- * version 2 or later. See the COPYING file in the top-level directory.
29
-
33
- */
30
- Outputs:
34
-
31
- LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
35
#ifndef FPU_SOFTFLOAT_MACROS_H
32
- positions of the displacements of forward jumps to the TLB miss case.
36
#define FPU_SOFTFLOAT_MACROS_H
33
-
37
34
- Second argument register is loaded with the low part of the address.
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
35
- In the TLB hit case, it has been adjusted as indicated by the TLB
39
36
- and so is a host address. In the TLB miss case, it continues to
40
}
37
- hold a guest address.
41
38
-
42
-/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
39
- First argument register is clobbered. */
43
- * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
40
-
44
- *
41
-static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
45
- * Licensed under the GPLv2/LGPLv3
42
- int mem_index, MemOp opc,
46
- */
43
- tcg_insn_unit **label_ptr, int which)
47
-static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
48
- uint64_t n0, uint64_t d)
49
-{
44
-{
50
-#if defined(__x86_64__)
45
- TCGType ttype = TCG_TYPE_I32;
51
- uint64_t q;
46
- TCGType tlbtype = TCG_TYPE_I32;
52
- asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
47
- int trexw = 0, hrexw = 0, tlbrexw = 0;
53
- return q;
48
- unsigned a_bits = get_alignment_bits(opc);
54
-#elif defined(__s390x__) && !defined(__clang__)
49
- unsigned s_bits = opc & MO_SIZE;
55
- /* Need to use a TImode type to get an even register pair for DLGR. */
50
- unsigned a_mask = (1 << a_bits) - 1;
56
- unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
51
- unsigned s_mask = (1 << s_bits) - 1;
57
- asm("dlgr %0, %1" : "+r"(n) : "r"(d));
52
- target_ulong tlb_mask;
58
- *r = n >> 64;
53
-
59
- return n;
54
- if (TCG_TARGET_REG_BITS == 64) {
60
-#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
55
- if (TARGET_LONG_BITS == 64) {
61
- /* From Power ISA 2.06, programming note for divdeu. */
56
- ttype = TCG_TYPE_I64;
62
- uint64_t q1, q2, Q, r1, r2, R;
57
- trexw = P_REXW;
63
- asm("divdeu %0,%2,%4; divdu %1,%3,%4"
58
- }
64
- : "=&r"(q1), "=r"(q2)
59
- if (TCG_TYPE_PTR == TCG_TYPE_I64) {
65
- : "r"(n1), "r"(n0), "r"(d));
60
- hrexw = P_REXW;
66
- r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
61
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
67
- r2 = n0 - (q2 * d);
62
- tlbtype = TCG_TYPE_I64;
68
- Q = q1 + q2;
63
- tlbrexw = P_REXW;
69
- R = r1 + r2;
70
- if (R >= d || R < r2) { /* overflow implies R > d */
71
- Q += 1;
72
- R -= d;
73
- }
74
- *r = R;
75
- return Q;
76
-#else
77
- uint64_t d0, d1, q0, q1, r1, r0, m;
78
-
79
- d0 = (uint32_t)d;
80
- d1 = d >> 32;
81
-
82
- r1 = n1 % d1;
83
- q1 = n1 / d1;
84
- m = q1 * d0;
85
- r1 = (r1 << 32) | (n0 >> 32);
86
- if (r1 < m) {
87
- q1 -= 1;
88
- r1 += d;
89
- if (r1 >= d) {
90
- if (r1 < m) {
91
- q1 -= 1;
92
- r1 += d;
93
- }
64
- }
94
- }
65
- }
95
- }
66
- }
96
- r1 -= m;
67
-
97
-
68
- tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
98
- r0 = r1 % d1;
69
- tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
99
- q0 = r1 / d1;
70
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
100
- m = q0 * d0;
71
-
101
- r0 = (r0 << 32) | (uint32_t)n0;
72
- tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
102
- if (r0 < m) {
73
- TLB_MASK_TABLE_OFS(mem_index) +
103
- q0 -= 1;
74
- offsetof(CPUTLBDescFast, mask));
104
- r0 += d;
75
-
105
- if (r0 >= d) {
76
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
106
- if (r0 < m) {
77
- TLB_MASK_TABLE_OFS(mem_index) +
107
- q0 -= 1;
78
- offsetof(CPUTLBDescFast, table));
108
- r0 += d;
79
-
109
- }
80
- /* If the required alignment is at least as large as the access, simply
110
- }
81
- copy the address and mask. For lesser alignments, check that we don't
82
- cross pages for the complete access. */
83
- if (a_bits >= s_bits) {
84
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
85
- } else {
86
- tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
87
- addrlo, s_mask - a_mask);
111
- }
88
- }
112
- r0 -= m;
89
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
113
-
90
- tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
114
- *r = r0;
91
-
115
- return (q1 << 32) | q0;
92
- /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
116
-#endif
93
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
94
- TCG_REG_L1, TCG_REG_L0, which);
95
-
96
- /* Prepare for both the fast path add of the tlb addend, and the slow
97
- path function argument setup. */
98
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
99
-
100
- /* jne slow_path */
101
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
102
- label_ptr[0] = s->code_ptr;
103
- s->code_ptr += 4;
104
-
105
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
106
- /* cmp 4(TCG_REG_L0), addrhi */
107
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4);
108
-
109
- /* jne slow_path */
110
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
111
- label_ptr[1] = s->code_ptr;
112
- s->code_ptr += 4;
113
- }
114
-
115
- /* TLB Hit. */
116
-
117
- /* add addend(TCG_REG_L0), TCG_REG_L1 */
118
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0,
119
- offsetof(CPUTLBEntry, addend));
117
-}
120
-}
118
-
121
-
119
/*----------------------------------------------------------------------------
122
-/*
120
| Returns an approximation to the square root of the 32-bit significand given
123
- * Record the context of a call to the out of line helper code for the slow path
121
| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
124
- * for a load or store, so that we can later generate the correct helper code
122
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
125
- */
123
index XXXXXXX..XXXXXXX 100644
126
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
124
--- a/include/qemu/host-utils.h
127
- TCGType type, MemOpIdx oi,
125
+++ b/include/qemu/host-utils.h
128
- TCGReg datalo, TCGReg datahi,
126
@@ -XXX,XX +XXX,XX @@
129
- TCGReg addrlo, TCGReg addrhi,
127
* THE SOFTWARE.
130
- tcg_insn_unit *raddr,
131
- tcg_insn_unit **label_ptr)
132
-{
133
- TCGLabelQemuLdst *label = new_ldst_label(s);
134
-
135
- label->is_ld = is_ld;
136
- label->oi = oi;
137
- label->type = type;
138
- label->datalo_reg = datalo;
139
- label->datahi_reg = datahi;
140
- label->addrlo_reg = addrlo;
141
- label->addrhi_reg = addrhi;
142
- label->raddr = tcg_splitwx_to_rx(raddr);
143
- label->label_ptr[0] = label_ptr[0];
144
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
145
- label->label_ptr[1] = label_ptr[1];
146
- }
147
-}
148
-
149
/*
150
* Generate code for the slow path for a load at the end of block
128
*/
151
*/
129
152
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
130
+/* Portions of this work are licensed under the terms of the GNU GPL,
153
return true;
131
+ * version 2 or later. See the COPYING file in the top-level directory.
154
}
155
#else
156
-
157
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
158
- TCGReg addrhi, unsigned a_bits)
159
-{
160
- unsigned a_mask = (1 << a_bits) - 1;
161
- TCGLabelQemuLdst *label;
162
-
163
- tcg_out_testi(s, addrlo, a_mask);
164
- /* jne slow_path */
165
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
166
-
167
- label = new_ldst_label(s);
168
- label->is_ld = is_ld;
169
- label->addrlo_reg = addrlo;
170
- label->addrhi_reg = addrhi;
171
- label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4);
172
- label->label_ptr[0] = s->code_ptr;
173
-
174
- s->code_ptr += 4;
175
-}
176
-
177
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
178
{
179
/* resolve label address */
180
@@ -XXX,XX +XXX,XX @@ static inline int setup_guest_base_seg(void)
181
#endif /* setup_guest_base_seg */
182
#endif /* SOFTMMU */
183
184
+/*
185
+ * For softmmu, perform the TLB load and compare.
186
+ * For useronly, perform any required alignment tests.
187
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
188
+ * is required and fill in @h with the host address for the fast path.
132
+ */
189
+ */
133
+
190
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
134
#ifndef HOST_UTILS_H
191
+ TCGReg addrlo, TCGReg addrhi,
135
#define HOST_UTILS_H
192
+ MemOpIdx oi, bool is_ld)
136
137
@@ -XXX,XX +XXX,XX @@ void urshift(uint64_t *plow, uint64_t *phigh, int32_t shift);
138
*/
139
void ulshift(uint64_t *plow, uint64_t *phigh, int32_t shift, bool *overflow);
140
141
+/* From the GNU Multi Precision Library - longlong.h __udiv_qrnnd
142
+ * (https://gmplib.org/repo/gmp/file/tip/longlong.h)
143
+ *
144
+ * Licensed under the GPLv2/LGPLv3
145
+ */
146
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
147
+ uint64_t n0, uint64_t d)
148
+{
193
+{
149
+#if defined(__x86_64__)
194
+ TCGLabelQemuLdst *ldst = NULL;
150
+ uint64_t q;
195
+ MemOp opc = get_memop(oi);
151
+ asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
196
+ unsigned a_bits = get_alignment_bits(opc);
152
+ return q;
197
+ unsigned a_mask = (1 << a_bits) - 1;
153
+#elif defined(__s390x__) && !defined(__clang__)
198
+
154
+ /* Need to use a TImode type to get an even register pair for DLGR. */
199
+#ifdef CONFIG_SOFTMMU
155
+ unsigned __int128 n = (unsigned __int128)n1 << 64 | n0;
200
+ int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
156
+ asm("dlgr %0, %1" : "+r"(n) : "r"(d));
201
+ : offsetof(CPUTLBEntry, addr_write);
157
+ *r = n >> 64;
202
+ TCGType ttype = TCG_TYPE_I32;
158
+ return n;
203
+ TCGType tlbtype = TCG_TYPE_I32;
159
+#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
204
+ int trexw = 0, hrexw = 0, tlbrexw = 0;
160
+ /* From Power ISA 2.06, programming note for divdeu. */
205
+ unsigned mem_index = get_mmuidx(oi);
161
+ uint64_t q1, q2, Q, r1, r2, R;
206
+ unsigned s_bits = opc & MO_SIZE;
162
+ asm("divdeu %0,%2,%4; divdu %1,%3,%4"
207
+ unsigned s_mask = (1 << s_bits) - 1;
163
+ : "=&r"(q1), "=r"(q2)
208
+ target_ulong tlb_mask;
164
+ : "r"(n1), "r"(n0), "r"(d));
209
+
165
+ r1 = -(q1 * d); /* low part of (n1<<64) - (q1 * d) */
210
+ ldst = new_ldst_label(s);
166
+ r2 = n0 - (q2 * d);
211
+ ldst->is_ld = is_ld;
167
+ Q = q1 + q2;
212
+ ldst->oi = oi;
168
+ R = r1 + r2;
213
+ ldst->addrlo_reg = addrlo;
169
+ if (R >= d || R < r2) { /* overflow implies R > d */
214
+ ldst->addrhi_reg = addrhi;
170
+ Q += 1;
215
+
171
+ R -= d;
216
+ if (TCG_TARGET_REG_BITS == 64) {
172
+ }
217
+ if (TARGET_LONG_BITS == 64) {
173
+ *r = R;
218
+ ttype = TCG_TYPE_I64;
174
+ return Q;
219
+ trexw = P_REXW;
175
+#else
220
+ }
176
+ uint64_t d0, d1, q0, q1, r1, r0, m;
221
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
177
+
222
+ hrexw = P_REXW;
178
+ d0 = (uint32_t)d;
223
+ if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
179
+ d1 = d >> 32;
224
+ tlbtype = TCG_TYPE_I64;
180
+
225
+ tlbrexw = P_REXW;
181
+ r1 = n1 % d1;
182
+ q1 = n1 / d1;
183
+ m = q1 * d0;
184
+ r1 = (r1 << 32) | (n0 >> 32);
185
+ if (r1 < m) {
186
+ q1 -= 1;
187
+ r1 += d;
188
+ if (r1 >= d) {
189
+ if (r1 < m) {
190
+ q1 -= 1;
191
+ r1 += d;
192
+ }
226
+ }
193
+ }
227
+ }
194
+ }
228
+ }
195
+ r1 -= m;
229
+
196
+
230
+ tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
197
+ r0 = r1 % d1;
231
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
198
+ q0 = r1 / d1;
232
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
199
+ m = q0 * d0;
233
+
200
+ r0 = (r0 << 32) | (uint32_t)n0;
234
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
201
+ if (r0 < m) {
235
+ TLB_MASK_TABLE_OFS(mem_index) +
202
+ q0 -= 1;
236
+ offsetof(CPUTLBDescFast, mask));
203
+ r0 += d;
237
+
204
+ if (r0 >= d) {
238
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
205
+ if (r0 < m) {
239
+ TLB_MASK_TABLE_OFS(mem_index) +
206
+ q0 -= 1;
240
+ offsetof(CPUTLBDescFast, table));
207
+ r0 += d;
241
+
208
+ }
242
+ /*
209
+ }
243
+ * If the required alignment is at least as large as the access, simply
244
+ * copy the address and mask. For lesser alignments, check that we don't
245
+ * cross pages for the complete access.
246
+ */
247
+ if (a_bits >= s_bits) {
248
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
249
+ } else {
250
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
251
+ addrlo, s_mask - a_mask);
210
+ }
252
+ }
211
+ r0 -= m;
253
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
212
+
254
+ tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
213
+ *r = r0;
255
+
214
+ return (q1 << 32) | q0;
256
+ /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
257
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
258
+ TCG_REG_L1, TCG_REG_L0, cmp_ofs);
259
+
260
+ /*
261
+ * Prepare for both the fast path add of the tlb addend, and the slow
262
+ * path function argument setup.
263
+ */
264
+ *h = (HostAddress) {
265
+ .base = TCG_REG_L1,
266
+ .index = -1
267
+ };
268
+ tcg_out_mov(s, ttype, h->base, addrlo);
269
+
270
+ /* jne slow_path */
271
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
272
+ ldst->label_ptr[0] = s->code_ptr;
273
+ s->code_ptr += 4;
274
+
275
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
276
+ /* cmp 4(TCG_REG_L0), addrhi */
277
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
278
+
279
+ /* jne slow_path */
280
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
281
+ ldst->label_ptr[1] = s->code_ptr;
282
+ s->code_ptr += 4;
283
+ }
284
+
285
+ /* TLB Hit. */
286
+
287
+ /* add addend(TCG_REG_L0), TCG_REG_L1 */
288
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0,
289
+ offsetof(CPUTLBEntry, addend));
290
+#else
291
+ if (a_bits) {
292
+ ldst = new_ldst_label(s);
293
+
294
+ ldst->is_ld = is_ld;
295
+ ldst->oi = oi;
296
+ ldst->addrlo_reg = addrlo;
297
+ ldst->addrhi_reg = addrhi;
298
+
299
+ tcg_out_testi(s, addrlo, a_mask);
300
+ /* jne slow_path */
301
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
302
+ ldst->label_ptr[0] = s->code_ptr;
303
+ s->code_ptr += 4;
304
+ }
305
+
306
+ *h = x86_guest_base;
307
+ h->base = addrlo;
215
+#endif
308
+#endif
309
+
310
+ return ldst;
216
+}
311
+}
217
+
312
+
218
#endif
313
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
314
HostAddress h, TCGType type, MemOp memop)
315
{
316
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
317
TCGReg addrlo, TCGReg addrhi,
318
MemOpIdx oi, TCGType data_type)
319
{
320
- MemOp opc = get_memop(oi);
321
+ TCGLabelQemuLdst *ldst;
322
HostAddress h;
323
324
-#if defined(CONFIG_SOFTMMU)
325
- tcg_insn_unit *label_ptr[2];
326
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
327
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi));
328
329
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
330
- label_ptr, offsetof(CPUTLBEntry, addr_read));
331
-
332
- /* TLB Hit. */
333
- h.base = TCG_REG_L1;
334
- h.index = -1;
335
- h.ofs = 0;
336
- h.seg = 0;
337
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
338
-
339
- /* Record the current context of a load into ldst label */
340
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
341
- addrlo, addrhi, s->code_ptr, label_ptr);
342
-#else
343
- unsigned a_bits = get_alignment_bits(opc);
344
- if (a_bits) {
345
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
346
+ if (ldst) {
347
+ ldst->type = data_type;
348
+ ldst->datalo_reg = datalo;
349
+ ldst->datahi_reg = datahi;
350
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
351
}
352
-
353
- h = x86_guest_base;
354
- h.base = addrlo;
355
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
356
-#endif
357
}
358
359
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
360
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
361
TCGReg addrlo, TCGReg addrhi,
362
MemOpIdx oi, TCGType data_type)
363
{
364
- MemOp opc = get_memop(oi);
365
+ TCGLabelQemuLdst *ldst;
366
HostAddress h;
367
368
-#if defined(CONFIG_SOFTMMU)
369
- tcg_insn_unit *label_ptr[2];
370
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
371
+ tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi));
372
373
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
374
- label_ptr, offsetof(CPUTLBEntry, addr_write));
375
-
376
- /* TLB Hit. */
377
- h.base = TCG_REG_L1;
378
- h.index = -1;
379
- h.ofs = 0;
380
- h.seg = 0;
381
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
382
-
383
- /* Record the current context of a store into ldst label */
384
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
385
- addrlo, addrhi, s->code_ptr, label_ptr);
386
-#else
387
- unsigned a_bits = get_alignment_bits(opc);
388
- if (a_bits) {
389
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
390
+ if (ldst) {
391
+ ldst->type = data_type;
392
+ ldst->datalo_reg = datalo;
393
+ ldst->datahi_reg = datahi;
394
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
395
}
396
-
397
- h = x86_guest_base;
398
- h.base = addrlo;
399
-
400
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
401
-#endif
402
}
403
404
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
219
--
405
--
220
2.25.1
406
2.34.1
221
407
222
408
diff view generated by jsdifflib
1
Sign repetitions are perforce all identical, whether they are 1 or 0.
1
Since tcg_out_{ld,st}_helper_args, the slow path no longer requires
2
Bitwise operations preserve the relative quantity of the repetitions.
2
the address argument to be set up by the tlb load sequence. Use a
3
plain load for the addend and indexed addressing with the original
4
input address register.
3
5
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 29 +++++++++++++++++++++++++++++
9
tcg/i386/tcg-target.c.inc | 25 ++++++++++---------------
10
1 file changed, 29 insertions(+)
10
1 file changed, 10 insertions(+), 15 deletions(-)
11
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
14
--- a/tcg/i386/tcg-target.c.inc
15
+++ b/tcg/optimize.c
15
+++ b/tcg/i386/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
17
z2 = arg_info(op->args[2])->z_mask;
17
tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
18
ctx->z_mask = z1 & z2;
18
} else {
19
19
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
20
+ /*
20
- /* The second argument is already loaded with addrlo. */
21
+ * Sign repetitions are perforce all identical, whether they are 1 or 0.
21
+ tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
22
+ * Bitwise operations preserve the relative quantity of the repetitions.
22
+ l->addrlo_reg);
23
+ */
23
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
24
+ ctx->s_mask = arg_info(op->args[1])->s_mask
24
tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
25
+ & arg_info(op->args[2])->s_mask;
25
(uintptr_t)l->raddr);
26
+
26
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
27
/*
27
tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
28
* Known-zeros does not imply known-ones. Therefore unless
28
} else {
29
* arg2 is constant, we can't infer affected bits from it.
29
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
30
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
30
- /* The second argument is already loaded with addrlo. */
31
+ tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
32
+ l->addrlo_reg);
33
tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
34
tcg_target_call_iarg_regs[2], l->datalo_reg);
35
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
36
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
37
tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
38
TCG_REG_L1, TCG_REG_L0, cmp_ofs);
39
40
- /*
41
- * Prepare for both the fast path add of the tlb addend, and the slow
42
- * path function argument setup.
43
- */
44
- *h = (HostAddress) {
45
- .base = TCG_REG_L1,
46
- .index = -1
47
- };
48
- tcg_out_mov(s, ttype, h->base, addrlo);
49
-
50
/* jne slow_path */
51
tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
52
ldst->label_ptr[0] = s->code_ptr;
53
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
31
}
54
}
32
ctx->z_mask = z1;
55
33
56
/* TLB Hit. */
34
+ ctx->s_mask = arg_info(op->args[1])->s_mask
57
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_L0, TCG_REG_L0,
35
+ & arg_info(op->args[2])->s_mask;
58
+ offsetof(CPUTLBEntry, addend));
36
return fold_masks(ctx, op);
59
37
}
60
- /* add addend(TCG_REG_L0), TCG_REG_L1 */
38
61
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0,
39
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
62
- offsetof(CPUTLBEntry, addend));
40
fold_xi_to_not(ctx, op, 0)) {
63
+ *h = (HostAddress) {
41
return true;
64
+ .base = addrlo,
42
}
65
+ .index = TCG_REG_L0,
43
+
66
+ };
44
+ ctx->s_mask = arg_info(op->args[1])->s_mask
67
#else
45
+ & arg_info(op->args[2])->s_mask;
68
if (a_bits) {
46
return false;
69
ldst = new_ldst_label(s);
47
}
48
49
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
50
51
ctx->z_mask = arg_info(op->args[3])->z_mask
52
| arg_info(op->args[4])->z_mask;
53
+ ctx->s_mask = arg_info(op->args[3])->s_mask
54
+ & arg_info(op->args[4])->s_mask;
55
56
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
57
uint64_t tv = arg_info(op->args[3])->val;
58
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
59
fold_xi_to_not(ctx, op, -1)) {
60
return true;
61
}
62
+
63
+ ctx->s_mask = arg_info(op->args[1])->s_mask
64
+ & arg_info(op->args[2])->s_mask;
65
return false;
66
}
67
68
@@ -XXX,XX +XXX,XX @@ static bool fold_nor(OptContext *ctx, TCGOp *op)
69
fold_xi_to_not(ctx, op, 0)) {
70
return true;
71
}
72
+
73
+ ctx->s_mask = arg_info(op->args[1])->s_mask
74
+ & arg_info(op->args[2])->s_mask;
75
return false;
76
}
77
78
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
79
return true;
80
}
81
82
+ ctx->s_mask = arg_info(op->args[1])->s_mask;
83
+
84
/* Because of fold_to_not, we want to always return true, via finish. */
85
finish_folding(ctx, op);
86
return true;
87
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
88
89
ctx->z_mask = arg_info(op->args[1])->z_mask
90
| arg_info(op->args[2])->z_mask;
91
+ ctx->s_mask = arg_info(op->args[1])->s_mask
92
+ & arg_info(op->args[2])->s_mask;
93
return fold_masks(ctx, op);
94
}
95
96
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
97
fold_ix_to_not(ctx, op, 0)) {
98
return true;
99
}
100
+
101
+ ctx->s_mask = arg_info(op->args[1])->s_mask
102
+ & arg_info(op->args[2])->s_mask;
103
return false;
104
}
105
106
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
107
108
ctx->z_mask = arg_info(op->args[1])->z_mask
109
| arg_info(op->args[2])->z_mask;
110
+ ctx->s_mask = arg_info(op->args[1])->s_mask
111
+ & arg_info(op->args[2])->s_mask;
112
return fold_masks(ctx, op);
113
}
114
115
--
70
--
116
2.25.1
71
2.34.1
117
72
118
73
diff view generated by jsdifflib
1
Split out the conditional conversion from a more complex logical
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
operation to a simple NOT. Create a couple more helpers to make
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
3
this easy for the outer-most logical operations.
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
4
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 158 +++++++++++++++++++++++++++----------------------
8
tcg/aarch64/tcg-target.c.inc | 313 +++++++++++++++--------------------
9
1 file changed, 86 insertions(+), 72 deletions(-)
9
1 file changed, 133 insertions(+), 180 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
14
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
16
return false;
16
tcg_out_goto(s, lb->raddr);
17
return true;
17
}
18
}
19
-
20
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
21
- TCGType ext, TCGReg data_reg, TCGReg addr_reg,
22
- tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
23
-{
24
- TCGLabelQemuLdst *label = new_ldst_label(s);
25
-
26
- label->is_ld = is_ld;
27
- label->oi = oi;
28
- label->type = ext;
29
- label->datalo_reg = data_reg;
30
- label->addrlo_reg = addr_reg;
31
- label->raddr = tcg_splitwx_to_rx(raddr);
32
- label->label_ptr[0] = label_ptr;
33
-}
34
-
35
-/* We expect to use a 7-bit scaled negative offset from ENV. */
36
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
37
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
38
-
39
-/* These offsets are built into the LDP below. */
40
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
41
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
42
-
43
-/* Load and compare a TLB entry, emitting the conditional jump to the
44
- slow path for the failure case, which will be patched later when finalizing
45
- the slow path. Generated code returns the host addend in X1,
46
- clobbers X0,X2,X3,TMP. */
47
-static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
48
- tcg_insn_unit **label_ptr, int mem_index,
49
- bool is_read)
50
-{
51
- unsigned a_bits = get_alignment_bits(opc);
52
- unsigned s_bits = opc & MO_SIZE;
53
- unsigned a_mask = (1u << a_bits) - 1;
54
- unsigned s_mask = (1u << s_bits) - 1;
55
- TCGReg x3;
56
- TCGType mask_type;
57
- uint64_t compare_mask;
58
-
59
- mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
60
- ? TCG_TYPE_I64 : TCG_TYPE_I32);
61
-
62
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
63
- tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
64
- TLB_MASK_TABLE_OFS(mem_index), 1, 0);
65
-
66
- /* Extract the TLB index from the address into X0. */
67
- tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
68
- TCG_REG_X0, TCG_REG_X0, addr_reg,
69
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
70
-
71
- /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
72
- tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
73
-
74
- /* Load the tlb comparator into X0, and the fast path addend into X1. */
75
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1, is_read
76
- ? offsetof(CPUTLBEntry, addr_read)
77
- : offsetof(CPUTLBEntry, addr_write));
78
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
79
- offsetof(CPUTLBEntry, addend));
80
-
81
- /* For aligned accesses, we check the first byte and include the alignment
82
- bits within the address. For unaligned access, we check that we don't
83
- cross pages using the address of the last byte of the access. */
84
- if (a_bits >= s_bits) {
85
- x3 = addr_reg;
86
- } else {
87
- tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
88
- TCG_REG_X3, addr_reg, s_mask - a_mask);
89
- x3 = TCG_REG_X3;
90
- }
91
- compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
92
-
93
- /* Store the page mask part of the address into X3. */
94
- tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
95
- TCG_REG_X3, x3, compare_mask);
96
-
97
- /* Perform the address comparison. */
98
- tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
99
-
100
- /* If not equal, we jump to the slow path. */
101
- *label_ptr = s->code_ptr;
102
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
103
-}
104
-
105
#else
106
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
107
- unsigned a_bits)
108
-{
109
- unsigned a_mask = (1 << a_bits) - 1;
110
- TCGLabelQemuLdst *label = new_ldst_label(s);
111
-
112
- label->is_ld = is_ld;
113
- label->addrlo_reg = addr_reg;
114
-
115
- /* tst addr, #mask */
116
- tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
117
-
118
- label->label_ptr[0] = s->code_ptr;
119
-
120
- /* b.ne slow_path */
121
- tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
122
-
123
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
124
-}
125
-
126
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
127
{
128
if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
129
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
130
}
131
#endif /* CONFIG_SOFTMMU */
18
132
19
+/*
133
+/*
20
+ * Convert @op to NOT, if NOT is supported by the host.
134
+ * For softmmu, perform the TLB load and compare.
21
+ * Return true f the conversion is successful, which will still
135
+ * For useronly, perform any required alignment tests.
22
+ * indicate that the processing is complete.
136
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
137
+ * is required and fill in @h with the host address for the fast path.
23
+ */
138
+ */
24
+static bool fold_not(OptContext *ctx, TCGOp *op);
139
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
25
+static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
140
+ TCGReg addr_reg, MemOpIdx oi,
141
+ bool is_ld)
26
+{
142
+{
27
+ TCGOpcode not_op;
143
+ TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
28
+ bool have_not;
144
+ TCGLabelQemuLdst *ldst = NULL;
29
+
145
+ MemOp opc = get_memop(oi);
30
+ switch (ctx->type) {
146
+ unsigned a_bits = get_alignment_bits(opc);
31
+ case TCG_TYPE_I32:
147
+ unsigned a_mask = (1u << a_bits) - 1;
32
+ not_op = INDEX_op_not_i32;
148
+
33
+ have_not = TCG_TARGET_HAS_not_i32;
149
+#ifdef CONFIG_SOFTMMU
34
+ break;
150
+ unsigned s_bits = opc & MO_SIZE;
35
+ case TCG_TYPE_I64:
151
+ unsigned s_mask = (1u << s_bits) - 1;
36
+ not_op = INDEX_op_not_i64;
152
+ unsigned mem_index = get_mmuidx(oi);
37
+ have_not = TCG_TARGET_HAS_not_i64;
153
+ TCGReg x3;
38
+ break;
154
+ TCGType mask_type;
39
+ case TCG_TYPE_V64:
155
+ uint64_t compare_mask;
40
+ case TCG_TYPE_V128:
156
+
41
+ case TCG_TYPE_V256:
157
+ ldst = new_ldst_label(s);
42
+ not_op = INDEX_op_not_vec;
158
+ ldst->is_ld = is_ld;
43
+ have_not = TCG_TARGET_HAS_not_vec;
159
+ ldst->oi = oi;
44
+ break;
160
+ ldst->addrlo_reg = addr_reg;
45
+ default:
161
+
46
+ g_assert_not_reached();
162
+ mask_type = (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32
163
+ ? TCG_TYPE_I64 : TCG_TYPE_I32);
164
+
165
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {x0,x1}. */
166
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
167
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -512);
168
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
169
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 8);
170
+ tcg_out_insn(s, 3314, LDP, TCG_REG_X0, TCG_REG_X1, TCG_AREG0,
171
+ TLB_MASK_TABLE_OFS(mem_index), 1, 0);
172
+
173
+ /* Extract the TLB index from the address into X0. */
174
+ tcg_out_insn(s, 3502S, AND_LSR, mask_type == TCG_TYPE_I64,
175
+ TCG_REG_X0, TCG_REG_X0, addr_reg,
176
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
177
+
178
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address into X1. */
179
+ tcg_out_insn(s, 3502, ADD, 1, TCG_REG_X1, TCG_REG_X1, TCG_REG_X0);
180
+
181
+ /* Load the tlb comparator into X0, and the fast path addend into X1. */
182
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_X0, TCG_REG_X1,
183
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
184
+ : offsetof(CPUTLBEntry, addr_write));
185
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_X1, TCG_REG_X1,
186
+ offsetof(CPUTLBEntry, addend));
187
+
188
+ /*
189
+ * For aligned accesses, we check the first byte and include the alignment
190
+ * bits within the address. For unaligned access, we check that we don't
191
+ * cross pages using the address of the last byte of the access.
192
+ */
193
+ if (a_bits >= s_bits) {
194
+ x3 = addr_reg;
195
+ } else {
196
+ tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
197
+ TCG_REG_X3, addr_reg, s_mask - a_mask);
198
+ x3 = TCG_REG_X3;
47
+ }
199
+ }
48
+ if (have_not) {
200
+ compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
49
+ op->opc = not_op;
201
+
50
+ op->args[1] = op->args[idx];
202
+ /* Store the page mask part of the address into X3. */
51
+ return fold_not(ctx, op);
203
+ tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
204
+ TCG_REG_X3, x3, compare_mask);
205
+
206
+ /* Perform the address comparison. */
207
+ tcg_out_cmp(s, TARGET_LONG_BITS == 64, TCG_REG_X0, TCG_REG_X3, 0);
208
+
209
+ /* If not equal, we jump to the slow path. */
210
+ ldst->label_ptr[0] = s->code_ptr;
211
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
212
+
213
+ *h = (HostAddress){
214
+ .base = TCG_REG_X1,
215
+ .index = addr_reg,
216
+ .index_ext = addr_type
217
+ };
218
+#else
219
+ if (a_mask) {
220
+ ldst = new_ldst_label(s);
221
+
222
+ ldst->is_ld = is_ld;
223
+ ldst->oi = oi;
224
+ ldst->addrlo_reg = addr_reg;
225
+
226
+ /* tst addr, #mask */
227
+ tcg_out_logicali(s, I3404_ANDSI, 0, TCG_REG_XZR, addr_reg, a_mask);
228
+
229
+ /* b.ne slow_path */
230
+ ldst->label_ptr[0] = s->code_ptr;
231
+ tcg_out_insn(s, 3202, B_C, TCG_COND_NE, 0);
52
+ }
232
+ }
53
+ return false;
233
+
234
+ if (USE_GUEST_BASE) {
235
+ *h = (HostAddress){
236
+ .base = TCG_REG_GUEST_BASE,
237
+ .index = addr_reg,
238
+ .index_ext = addr_type
239
+ };
240
+ } else {
241
+ *h = (HostAddress){
242
+ .base = addr_reg,
243
+ .index = TCG_REG_XZR,
244
+ .index_ext = TCG_TYPE_I64
245
+ };
246
+ }
247
+#endif
248
+
249
+ return ldst;
54
+}
250
+}
55
+
251
+
56
+/* If the binary operation has first argument @i, fold to NOT. */
252
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp memop, TCGType ext,
57
+static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
253
TCGReg data_r, HostAddress h)
58
+{
59
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
60
+ return fold_to_not(ctx, op, 2);
61
+ }
62
+ return false;
63
+}
64
+
65
/* If the binary operation has second argument @i, fold to @i. */
66
static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
67
{
254
{
68
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
255
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp memop,
69
return false;
256
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
257
MemOpIdx oi, TCGType data_type)
258
{
259
- MemOp memop = get_memop(oi);
260
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
261
+ TCGLabelQemuLdst *ldst;
262
HostAddress h;
263
264
- /* Byte swapping is left to middle-end expansion. */
265
- tcg_debug_assert((memop & MO_BSWAP) == 0);
266
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
267
+ tcg_out_qemu_ld_direct(s, get_memop(oi), data_type, data_reg, h);
268
269
-#ifdef CONFIG_SOFTMMU
270
- tcg_insn_unit *label_ptr;
271
-
272
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 1);
273
-
274
- h = (HostAddress){
275
- .base = TCG_REG_X1,
276
- .index = addr_reg,
277
- .index_ext = addr_type
278
- };
279
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
280
-
281
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
282
- s->code_ptr, label_ptr);
283
-#else /* !CONFIG_SOFTMMU */
284
- unsigned a_bits = get_alignment_bits(memop);
285
- if (a_bits) {
286
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
287
+ if (ldst) {
288
+ ldst->type = data_type;
289
+ ldst->datalo_reg = data_reg;
290
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
291
}
292
- if (USE_GUEST_BASE) {
293
- h = (HostAddress){
294
- .base = TCG_REG_GUEST_BASE,
295
- .index = addr_reg,
296
- .index_ext = addr_type
297
- };
298
- } else {
299
- h = (HostAddress){
300
- .base = addr_reg,
301
- .index = TCG_REG_XZR,
302
- .index_ext = TCG_TYPE_I64
303
- };
304
- }
305
- tcg_out_qemu_ld_direct(s, memop, data_type, data_reg, h);
306
-#endif /* CONFIG_SOFTMMU */
70
}
307
}
71
308
72
+/* If the binary operation has second argument @i, fold to NOT. */
309
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
73
+static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
310
MemOpIdx oi, TCGType data_type)
74
+{
75
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
76
+ return fold_to_not(ctx, op, 1);
77
+ }
78
+ return false;
79
+}
80
+
81
/* If the binary operation has both arguments equal, fold to @i. */
82
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
83
{
311
{
84
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
312
- MemOp memop = get_memop(oi);
85
static bool fold_andc(OptContext *ctx, TCGOp *op)
313
- TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
86
{
314
+ TCGLabelQemuLdst *ldst;
87
if (fold_const2(ctx, op) ||
315
HostAddress h;
88
- fold_xx_to_i(ctx, op, 0)) {
316
89
+ fold_xx_to_i(ctx, op, 0) ||
317
- /* Byte swapping is left to middle-end expansion. */
90
+ fold_ix_to_not(ctx, op, -1)) {
318
- tcg_debug_assert((memop & MO_BSWAP) == 0);
91
return true;
319
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
320
+ tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
321
322
-#ifdef CONFIG_SOFTMMU
323
- tcg_insn_unit *label_ptr;
324
-
325
- tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, get_mmuidx(oi), 0);
326
-
327
- h = (HostAddress){
328
- .base = TCG_REG_X1,
329
- .index = addr_reg,
330
- .index_ext = addr_type
331
- };
332
- tcg_out_qemu_st_direct(s, memop, data_reg, h);
333
-
334
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
335
- s->code_ptr, label_ptr);
336
-#else /* !CONFIG_SOFTMMU */
337
- unsigned a_bits = get_alignment_bits(memop);
338
- if (a_bits) {
339
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
340
+ if (ldst) {
341
+ ldst->type = data_type;
342
+ ldst->datalo_reg = data_reg;
343
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
92
}
344
}
93
return false;
345
- if (USE_GUEST_BASE) {
94
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
346
- h = (HostAddress){
95
347
- .base = TCG_REG_GUEST_BASE,
96
static bool fold_eqv(OptContext *ctx, TCGOp *op)
348
- .index = addr_reg,
97
{
349
- .index_ext = addr_type
98
- return fold_const2(ctx, op);
350
- };
99
+ if (fold_const2(ctx, op) ||
351
- } else {
100
+ fold_xi_to_not(ctx, op, 0)) {
352
- h = (HostAddress){
101
+ return true;
353
- .base = addr_reg,
102
+ }
354
- .index = TCG_REG_XZR,
103
+ return false;
355
- .index_ext = TCG_TYPE_I64
356
- };
357
- }
358
- tcg_out_qemu_st_direct(s, memop, data_reg, h);
359
-#endif /* CONFIG_SOFTMMU */
104
}
360
}
105
361
106
static bool fold_extract(OptContext *ctx, TCGOp *op)
362
static const tcg_insn_unit *tb_ret_addr;
107
@@ -XXX,XX +XXX,XX @@ static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
108
109
static bool fold_nand(OptContext *ctx, TCGOp *op)
110
{
111
- return fold_const2(ctx, op);
112
+ if (fold_const2(ctx, op) ||
113
+ fold_xi_to_not(ctx, op, -1)) {
114
+ return true;
115
+ }
116
+ return false;
117
}
118
119
static bool fold_neg(OptContext *ctx, TCGOp *op)
120
@@ -XXX,XX +XXX,XX @@ static bool fold_neg(OptContext *ctx, TCGOp *op)
121
122
static bool fold_nor(OptContext *ctx, TCGOp *op)
123
{
124
- return fold_const2(ctx, op);
125
+ if (fold_const2(ctx, op) ||
126
+ fold_xi_to_not(ctx, op, 0)) {
127
+ return true;
128
+ }
129
+ return false;
130
}
131
132
static bool fold_not(OptContext *ctx, TCGOp *op)
133
{
134
- return fold_const1(ctx, op);
135
+ if (fold_const1(ctx, op)) {
136
+ return true;
137
+ }
138
+
139
+ /* Because of fold_to_not, we want to always return true, via finish. */
140
+ finish_folding(ctx, op);
141
+ return true;
142
}
143
144
static bool fold_or(OptContext *ctx, TCGOp *op)
145
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
146
147
static bool fold_orc(OptContext *ctx, TCGOp *op)
148
{
149
- return fold_const2(ctx, op);
150
+ if (fold_const2(ctx, op) ||
151
+ fold_ix_to_not(ctx, op, 0)) {
152
+ return true;
153
+ }
154
+ return false;
155
}
156
157
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
158
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
159
static bool fold_xor(OptContext *ctx, TCGOp *op)
160
{
161
if (fold_const2(ctx, op) ||
162
- fold_xx_to_i(ctx, op, 0)) {
163
+ fold_xx_to_i(ctx, op, 0) ||
164
+ fold_xi_to_not(ctx, op, -1)) {
165
return true;
166
}
167
return false;
168
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
169
}
170
}
171
break;
172
- CASE_OP_32_64_VEC(xor):
173
- CASE_OP_32_64(nand):
174
- if (!arg_is_const(op->args[1])
175
- && arg_is_const(op->args[2])
176
- && arg_info(op->args[2])->val == -1) {
177
- i = 1;
178
- goto try_not;
179
- }
180
- break;
181
- CASE_OP_32_64(nor):
182
- if (!arg_is_const(op->args[1])
183
- && arg_is_const(op->args[2])
184
- && arg_info(op->args[2])->val == 0) {
185
- i = 1;
186
- goto try_not;
187
- }
188
- break;
189
- CASE_OP_32_64_VEC(andc):
190
- if (!arg_is_const(op->args[2])
191
- && arg_is_const(op->args[1])
192
- && arg_info(op->args[1])->val == -1) {
193
- i = 2;
194
- goto try_not;
195
- }
196
- break;
197
- CASE_OP_32_64_VEC(orc):
198
- CASE_OP_32_64(eqv):
199
- if (!arg_is_const(op->args[2])
200
- && arg_is_const(op->args[1])
201
- && arg_info(op->args[1])->val == 0) {
202
- i = 2;
203
- goto try_not;
204
- }
205
- break;
206
- try_not:
207
- {
208
- TCGOpcode not_op;
209
- bool have_not;
210
-
211
- switch (ctx.type) {
212
- case TCG_TYPE_I32:
213
- not_op = INDEX_op_not_i32;
214
- have_not = TCG_TARGET_HAS_not_i32;
215
- break;
216
- case TCG_TYPE_I64:
217
- not_op = INDEX_op_not_i64;
218
- have_not = TCG_TARGET_HAS_not_i64;
219
- break;
220
- case TCG_TYPE_V64:
221
- case TCG_TYPE_V128:
222
- case TCG_TYPE_V256:
223
- not_op = INDEX_op_not_vec;
224
- have_not = TCG_TARGET_HAS_not_vec;
225
- break;
226
- default:
227
- g_assert_not_reached();
228
- }
229
- if (!have_not) {
230
- break;
231
- }
232
- op->opc = not_op;
233
- reset_temp(op->args[0]);
234
- op->args[1] = op->args[i];
235
- continue;
236
- }
237
default:
238
break;
239
}
240
--
363
--
241
2.25.1
364
2.34.1
242
365
243
366
diff view generated by jsdifflib
1
Split out a whole bunch of placeholder functions, which are
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, and some code that lived
2
currently identical. That won't last as more code gets moved.
2
in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that
3
3
returns HostAddress and TCGLabelQemuLdst structures.
4
Use CASE_32_64_VEC for some logical operators that previously
5
missed the addition of vectors.
6
4
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
7
---
11
tcg/optimize.c | 271 +++++++++++++++++++++++++++++++++++++++----------
8
tcg/arm/tcg-target.c.inc | 351 ++++++++++++++++++---------------------
12
1 file changed, 219 insertions(+), 52 deletions(-)
9
1 file changed, 159 insertions(+), 192 deletions(-)
13
10
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
13
--- a/tcg/arm/tcg-target.c.inc
17
+++ b/tcg/optimize.c
14
+++ b/tcg/arm/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
19
}
16
}
20
}
17
}
21
18
22
+/*
19
-#define TLB_SHIFT    (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
23
+ * The fold_* functions return true when processing is complete,
20
-
24
+ * usually by folding the operation to a constant or to a copy,
21
-/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
25
+ * and calling tcg_opt_gen_{mov,movi}. They may do other things,
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
26
+ * like collect information about the value produced, for use in
23
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
27
+ * optimizing a subsequent operation.
24
-
28
+ *
25
-/* These offsets are built into the LDRD below. */
29
+ * These first fold_* functions are all helpers, used by other
26
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
30
+ * folders for more specific operations.
27
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
31
+ */
28
-
32
+
29
-/* Load and compare a TLB entry, leaving the flags set. Returns the register
33
+static bool fold_const1(OptContext *ctx, TCGOp *op)
30
- containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
34
+{
31
-
35
+ if (arg_is_const(op->args[1])) {
32
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
36
+ uint64_t t;
33
- MemOp opc, int mem_index, bool is_load)
37
+
34
-{
38
+ t = arg_info(op->args[1])->val;
35
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
39
+ t = do_constant_folding(op->opc, t, 0);
36
- : offsetof(CPUTLBEntry, addr_write));
40
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
37
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
41
+ }
38
- unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
42
+ return false;
39
- unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
43
+}
40
- TCGReg t_addr;
44
+
41
-
45
+static bool fold_const2(OptContext *ctx, TCGOp *op)
42
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
46
+{
43
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
47
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
44
-
48
+ uint64_t t1 = arg_info(op->args[1])->val;
45
- /* Extract the tlb index from the address into R0. */
49
+ uint64_t t2 = arg_info(op->args[2])->val;
46
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
50
+
47
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
51
+ t1 = do_constant_folding(op->opc, t1, t2);
48
-
52
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
49
- /*
53
+ }
50
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
54
+ return false;
51
- * Load the tlb comparator into R2/R3 and the fast path addend into R1.
55
+}
52
- */
56
+
53
- if (cmp_off == 0) {
57
+/*
54
- if (TARGET_LONG_BITS == 64) {
58
+ * These outermost fold_<op> functions are sorted alphabetically.
55
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
59
+ */
56
- } else {
60
+
57
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
61
+static bool fold_add(OptContext *ctx, TCGOp *op)
58
- }
62
+{
59
- } else {
63
+ return fold_const2(ctx, op);
60
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
64
+}
61
- TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
65
+
62
- if (TARGET_LONG_BITS == 64) {
66
+static bool fold_and(OptContext *ctx, TCGOp *op)
63
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
67
+{
64
- } else {
68
+ return fold_const2(ctx, op);
65
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
69
+}
66
- }
70
+
67
- }
71
+static bool fold_andc(OptContext *ctx, TCGOp *op)
68
-
72
+{
69
- /* Load the tlb addend. */
73
+ return fold_const2(ctx, op);
70
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
74
+}
71
- offsetof(CPUTLBEntry, addend));
75
+
72
-
76
static bool fold_call(OptContext *ctx, TCGOp *op)
73
- /*
74
- * Check alignment, check comparators.
75
- * Do this in 2-4 insns. Use MOVW for v7, if possible,
76
- * to reduce the number of sequential conditional instructions.
77
- * Almost all guests have at least 4k pages, which means that we need
78
- * to clear at least 9 bits even for an 8-byte memory, which means it
79
- * isn't worth checking for an immediate operand for BIC.
80
- *
81
- * For unaligned accesses, test the page of the last unit of alignment.
82
- * This leaves the least significant alignment bits unchanged, and of
83
- * course must be zero.
84
- */
85
- t_addr = addrlo;
86
- if (a_mask < s_mask) {
87
- t_addr = TCG_REG_R0;
88
- tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
89
- addrlo, s_mask - a_mask);
90
- }
91
- if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
92
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
93
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
94
- t_addr, TCG_REG_TMP, 0);
95
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
96
- } else {
97
- if (a_mask) {
98
- tcg_debug_assert(a_mask <= 0xff);
99
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
100
- }
101
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
102
- SHIFT_IMM_LSR(TARGET_PAGE_BITS));
103
- tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
104
- 0, TCG_REG_R2, TCG_REG_TMP,
105
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
106
- }
107
-
108
- if (TARGET_LONG_BITS == 64) {
109
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
110
- }
111
-
112
- return TCG_REG_R1;
113
-}
114
-
115
-/* Record the context of a call to the out of line helper code for the slow
116
- path for a load or store, so that we can later generate the correct
117
- helper code. */
118
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
119
- MemOpIdx oi, TCGType type,
120
- TCGReg datalo, TCGReg datahi,
121
- TCGReg addrlo, TCGReg addrhi,
122
- tcg_insn_unit *raddr,
123
- tcg_insn_unit *label_ptr)
124
-{
125
- TCGLabelQemuLdst *label = new_ldst_label(s);
126
-
127
- label->is_ld = is_ld;
128
- label->oi = oi;
129
- label->type = type;
130
- label->datalo_reg = datalo;
131
- label->datahi_reg = datahi;
132
- label->addrlo_reg = addrlo;
133
- label->addrhi_reg = addrhi;
134
- label->raddr = tcg_splitwx_to_rx(raddr);
135
- label->label_ptr[0] = label_ptr;
136
-}
137
-
138
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
77
{
139
{
78
TCGContext *s = ctx->tcg;
140
TCGReg argreg;
79
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
141
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
80
return true;
142
return true;
81
}
143
}
82
144
#else
83
+static bool fold_ctpop(OptContext *ctx, TCGOp *op)
145
-
146
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
147
- TCGReg addrhi, unsigned a_bits)
148
-{
149
- unsigned a_mask = (1 << a_bits) - 1;
150
- TCGLabelQemuLdst *label = new_ldst_label(s);
151
-
152
- label->is_ld = is_ld;
153
- label->addrlo_reg = addrlo;
154
- label->addrhi_reg = addrhi;
155
-
156
- /* We are expecting a_bits to max out at 7, and can easily support 8. */
157
- tcg_debug_assert(a_mask <= 0xff);
158
- /* tst addr, #mask */
159
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
160
-
161
- /* blne slow_path */
162
- label->label_ptr[0] = s->code_ptr;
163
- tcg_out_bl_imm(s, COND_NE, 0);
164
-
165
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
166
-}
167
-
168
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
169
{
170
if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
171
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
172
}
173
#endif /* SOFTMMU */
174
175
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
176
+ TCGReg addrlo, TCGReg addrhi,
177
+ MemOpIdx oi, bool is_ld)
84
+{
178
+{
85
+ return fold_const1(ctx, op);
179
+ TCGLabelQemuLdst *ldst = NULL;
180
+ MemOp opc = get_memop(oi);
181
+ MemOp a_bits = get_alignment_bits(opc);
182
+ unsigned a_mask = (1 << a_bits) - 1;
183
+
184
+#ifdef CONFIG_SOFTMMU
185
+ int mem_index = get_mmuidx(oi);
186
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
187
+ : offsetof(CPUTLBEntry, addr_write);
188
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
189
+ unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
190
+ TCGReg t_addr;
191
+
192
+ ldst = new_ldst_label(s);
193
+ ldst->is_ld = is_ld;
194
+ ldst->oi = oi;
195
+ ldst->addrlo_reg = addrlo;
196
+ ldst->addrhi_reg = addrhi;
197
+
198
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
199
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
200
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
201
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
202
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
203
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
204
+
205
+ /* Extract the tlb index from the address into R0. */
206
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
207
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
208
+
209
+ /*
210
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
211
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
212
+ */
213
+ if (cmp_off == 0) {
214
+ if (TARGET_LONG_BITS == 64) {
215
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
216
+ } else {
217
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
218
+ }
219
+ } else {
220
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
221
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
222
+ if (TARGET_LONG_BITS == 64) {
223
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
224
+ } else {
225
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
226
+ }
227
+ }
228
+
229
+ /* Load the tlb addend. */
230
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
231
+ offsetof(CPUTLBEntry, addend));
232
+
233
+ /*
234
+ * Check alignment, check comparators.
235
+ * Do this in 2-4 insns. Use MOVW for v7, if possible,
236
+ * to reduce the number of sequential conditional instructions.
237
+ * Almost all guests have at least 4k pages, which means that we need
238
+ * to clear at least 9 bits even for an 8-byte memory, which means it
239
+ * isn't worth checking for an immediate operand for BIC.
240
+ *
241
+ * For unaligned accesses, test the page of the last unit of alignment.
242
+ * This leaves the least significant alignment bits unchanged, and of
243
+ * course must be zero.
244
+ */
245
+ t_addr = addrlo;
246
+ if (a_mask < s_mask) {
247
+ t_addr = TCG_REG_R0;
248
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
249
+ addrlo, s_mask - a_mask);
250
+ }
251
+ if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
252
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
253
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
254
+ t_addr, TCG_REG_TMP, 0);
255
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
256
+ } else {
257
+ if (a_mask) {
258
+ tcg_debug_assert(a_mask <= 0xff);
259
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
260
+ }
261
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
262
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
263
+ tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
264
+ 0, TCG_REG_R2, TCG_REG_TMP,
265
+ SHIFT_IMM_LSL(TARGET_PAGE_BITS));
266
+ }
267
+
268
+ if (TARGET_LONG_BITS == 64) {
269
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
270
+ }
271
+
272
+ *h = (HostAddress){
273
+ .cond = COND_AL,
274
+ .base = addrlo,
275
+ .index = TCG_REG_R1,
276
+ .index_scratch = true,
277
+ };
278
+#else
279
+ if (a_mask) {
280
+ ldst = new_ldst_label(s);
281
+ ldst->is_ld = is_ld;
282
+ ldst->oi = oi;
283
+ ldst->addrlo_reg = addrlo;
284
+ ldst->addrhi_reg = addrhi;
285
+
286
+ /* We are expecting a_bits to max out at 7 */
287
+ tcg_debug_assert(a_mask <= 0xff);
288
+ /* tst addr, #mask */
289
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
290
+ }
291
+
292
+ *h = (HostAddress){
293
+ .cond = COND_AL,
294
+ .base = addrlo,
295
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
296
+ .index_scratch = false,
297
+ };
298
+#endif
299
+
300
+ return ldst;
86
+}
301
+}
87
+
302
+
88
+static bool fold_divide(OptContext *ctx, TCGOp *op)
303
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
89
+{
304
TCGReg datahi, HostAddress h)
90
+ return fold_const2(ctx, op);
91
+}
92
+
93
+static bool fold_eqv(OptContext *ctx, TCGOp *op)
94
+{
95
+ return fold_const2(ctx, op);
96
+}
97
+
98
+static bool fold_exts(OptContext *ctx, TCGOp *op)
99
+{
100
+ return fold_const1(ctx, op);
101
+}
102
+
103
+static bool fold_extu(OptContext *ctx, TCGOp *op)
104
+{
105
+ return fold_const1(ctx, op);
106
+}
107
+
108
static bool fold_mb(OptContext *ctx, TCGOp *op)
109
{
305
{
110
/* Eliminate duplicate and redundant fence instructions. */
306
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
111
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
307
MemOpIdx oi, TCGType data_type)
112
return true;
308
{
309
MemOp opc = get_memop(oi);
310
+ TCGLabelQemuLdst *ldst;
311
HostAddress h;
312
313
-#ifdef CONFIG_SOFTMMU
314
- h.cond = COND_AL;
315
- h.base = addrlo;
316
- h.index_scratch = true;
317
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
318
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
319
+ if (ldst) {
320
+ ldst->type = data_type;
321
+ ldst->datalo_reg = datalo;
322
+ ldst->datahi_reg = datahi;
323
324
- /*
325
- * This a conditional BL only to load a pointer within this opcode into
326
- * LR for the slow path. We will not be using the value for a tail call.
327
- */
328
- tcg_insn_unit *label_ptr = s->code_ptr;
329
- tcg_out_bl_imm(s, COND_NE, 0);
330
+ /*
331
+ * This a conditional BL only to load a pointer within this
332
+ * opcode into LR for the slow path. We will not be using
333
+ * the value for a tail call.
334
+ */
335
+ ldst->label_ptr[0] = s->code_ptr;
336
+ tcg_out_bl_imm(s, COND_NE, 0);
337
338
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
339
-
340
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
341
- addrlo, addrhi, s->code_ptr, label_ptr);
342
-#else
343
- unsigned a_bits = get_alignment_bits(opc);
344
- if (a_bits) {
345
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
346
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
347
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
348
+ } else {
349
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
350
}
351
-
352
- h.cond = COND_AL;
353
- h.base = addrlo;
354
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
355
- h.index_scratch = false;
356
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
357
-#endif
113
}
358
}
114
359
115
+static bool fold_mul(OptContext *ctx, TCGOp *op)
360
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
116
+{
361
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
117
+ return fold_const2(ctx, op);
362
MemOpIdx oi, TCGType data_type)
118
+}
119
+
120
+static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
121
+{
122
+ return fold_const2(ctx, op);
123
+}
124
+
125
+static bool fold_nand(OptContext *ctx, TCGOp *op)
126
+{
127
+ return fold_const2(ctx, op);
128
+}
129
+
130
+static bool fold_neg(OptContext *ctx, TCGOp *op)
131
+{
132
+ return fold_const1(ctx, op);
133
+}
134
+
135
+static bool fold_nor(OptContext *ctx, TCGOp *op)
136
+{
137
+ return fold_const2(ctx, op);
138
+}
139
+
140
+static bool fold_not(OptContext *ctx, TCGOp *op)
141
+{
142
+ return fold_const1(ctx, op);
143
+}
144
+
145
+static bool fold_or(OptContext *ctx, TCGOp *op)
146
+{
147
+ return fold_const2(ctx, op);
148
+}
149
+
150
+static bool fold_orc(OptContext *ctx, TCGOp *op)
151
+{
152
+ return fold_const2(ctx, op);
153
+}
154
+
155
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
156
{
363
{
157
/* Opcodes that touch guest memory stop the mb optimization. */
364
MemOp opc = get_memop(oi);
158
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
365
+ TCGLabelQemuLdst *ldst;
159
return false;
366
HostAddress h;
367
368
-#ifdef CONFIG_SOFTMMU
369
- h.cond = COND_EQ;
370
- h.base = addrlo;
371
- h.index_scratch = true;
372
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
373
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
374
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
375
+ if (ldst) {
376
+ ldst->type = data_type;
377
+ ldst->datalo_reg = datalo;
378
+ ldst->datahi_reg = datahi;
379
380
- /* The conditional call must come last, as we're going to return here. */
381
- tcg_insn_unit *label_ptr = s->code_ptr;
382
- tcg_out_bl_imm(s, COND_NE, 0);
383
-
384
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
385
- addrlo, addrhi, s->code_ptr, label_ptr);
386
-#else
387
- unsigned a_bits = get_alignment_bits(opc);
388
-
389
- h.cond = COND_AL;
390
- if (a_bits) {
391
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
392
h.cond = COND_EQ;
393
- }
394
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
395
396
- h.base = addrlo;
397
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
398
- h.index_scratch = false;
399
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
400
-#endif
401
+ /* The conditional call is last, as we're going to return here. */
402
+ ldst->label_ptr[0] = s->code_ptr;
403
+ tcg_out_bl_imm(s, COND_NE, 0);
404
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
405
+ } else {
406
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
407
+ }
160
}
408
}
161
409
162
+static bool fold_remainder(OptContext *ctx, TCGOp *op)
410
static void tcg_out_epilogue(TCGContext *s);
163
+{
164
+ return fold_const2(ctx, op);
165
+}
166
+
167
+static bool fold_shift(OptContext *ctx, TCGOp *op)
168
+{
169
+ return fold_const2(ctx, op);
170
+}
171
+
172
+static bool fold_sub(OptContext *ctx, TCGOp *op)
173
+{
174
+ return fold_const2(ctx, op);
175
+}
176
+
177
+static bool fold_xor(OptContext *ctx, TCGOp *op)
178
+{
179
+ return fold_const2(ctx, op);
180
+}
181
+
182
/* Propagate constants and copies, fold constant expressions. */
183
void tcg_optimize(TCGContext *s)
184
{
185
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
186
}
187
break;
188
189
- CASE_OP_32_64(not):
190
- CASE_OP_32_64(neg):
191
- CASE_OP_32_64(ext8s):
192
- CASE_OP_32_64(ext8u):
193
- CASE_OP_32_64(ext16s):
194
- CASE_OP_32_64(ext16u):
195
- CASE_OP_32_64(ctpop):
196
- case INDEX_op_ext32s_i64:
197
- case INDEX_op_ext32u_i64:
198
- case INDEX_op_ext_i32_i64:
199
- case INDEX_op_extu_i32_i64:
200
- case INDEX_op_extrl_i64_i32:
201
- case INDEX_op_extrh_i64_i32:
202
- if (arg_is_const(op->args[1])) {
203
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
204
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
205
- continue;
206
- }
207
- break;
208
-
209
CASE_OP_32_64(bswap16):
210
CASE_OP_32_64(bswap32):
211
case INDEX_op_bswap64_i64:
212
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
213
}
214
break;
215
216
- CASE_OP_32_64(add):
217
- CASE_OP_32_64(sub):
218
- CASE_OP_32_64(mul):
219
- CASE_OP_32_64(or):
220
- CASE_OP_32_64(and):
221
- CASE_OP_32_64(xor):
222
- CASE_OP_32_64(shl):
223
- CASE_OP_32_64(shr):
224
- CASE_OP_32_64(sar):
225
- CASE_OP_32_64(rotl):
226
- CASE_OP_32_64(rotr):
227
- CASE_OP_32_64(andc):
228
- CASE_OP_32_64(orc):
229
- CASE_OP_32_64(eqv):
230
- CASE_OP_32_64(nand):
231
- CASE_OP_32_64(nor):
232
- CASE_OP_32_64(muluh):
233
- CASE_OP_32_64(mulsh):
234
- CASE_OP_32_64(div):
235
- CASE_OP_32_64(divu):
236
- CASE_OP_32_64(rem):
237
- CASE_OP_32_64(remu):
238
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
239
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
240
- arg_info(op->args[2])->val);
241
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
242
- continue;
243
- }
244
- break;
245
-
246
CASE_OP_32_64(clz):
247
CASE_OP_32_64(ctz):
248
if (arg_is_const(op->args[1])) {
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
}
251
break;
252
253
+ default:
254
+ break;
255
+
256
+ /* ---------------------------------------------------------- */
257
+ /* Sorted alphabetically by opcode as much as possible. */
258
+
259
+ CASE_OP_32_64_VEC(add):
260
+ done = fold_add(&ctx, op);
261
+ break;
262
+ CASE_OP_32_64_VEC(and):
263
+ done = fold_and(&ctx, op);
264
+ break;
265
+ CASE_OP_32_64_VEC(andc):
266
+ done = fold_andc(&ctx, op);
267
+ break;
268
+ CASE_OP_32_64(ctpop):
269
+ done = fold_ctpop(&ctx, op);
270
+ break;
271
+ CASE_OP_32_64(div):
272
+ CASE_OP_32_64(divu):
273
+ done = fold_divide(&ctx, op);
274
+ break;
275
+ CASE_OP_32_64(eqv):
276
+ done = fold_eqv(&ctx, op);
277
+ break;
278
+ CASE_OP_32_64(ext8s):
279
+ CASE_OP_32_64(ext16s):
280
+ case INDEX_op_ext32s_i64:
281
+ case INDEX_op_ext_i32_i64:
282
+ done = fold_exts(&ctx, op);
283
+ break;
284
+ CASE_OP_32_64(ext8u):
285
+ CASE_OP_32_64(ext16u):
286
+ case INDEX_op_ext32u_i64:
287
+ case INDEX_op_extu_i32_i64:
288
+ case INDEX_op_extrl_i64_i32:
289
+ case INDEX_op_extrh_i64_i32:
290
+ done = fold_extu(&ctx, op);
291
+ break;
292
case INDEX_op_mb:
293
done = fold_mb(&ctx, op);
294
break;
295
+ CASE_OP_32_64(mul):
296
+ done = fold_mul(&ctx, op);
297
+ break;
298
+ CASE_OP_32_64(mulsh):
299
+ CASE_OP_32_64(muluh):
300
+ done = fold_mul_highpart(&ctx, op);
301
+ break;
302
+ CASE_OP_32_64(nand):
303
+ done = fold_nand(&ctx, op);
304
+ break;
305
+ CASE_OP_32_64(neg):
306
+ done = fold_neg(&ctx, op);
307
+ break;
308
+ CASE_OP_32_64(nor):
309
+ done = fold_nor(&ctx, op);
310
+ break;
311
+ CASE_OP_32_64_VEC(not):
312
+ done = fold_not(&ctx, op);
313
+ break;
314
+ CASE_OP_32_64_VEC(or):
315
+ done = fold_or(&ctx, op);
316
+ break;
317
+ CASE_OP_32_64_VEC(orc):
318
+ done = fold_orc(&ctx, op);
319
+ break;
320
case INDEX_op_qemu_ld_i32:
321
case INDEX_op_qemu_ld_i64:
322
done = fold_qemu_ld(&ctx, op);
323
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
324
case INDEX_op_qemu_st_i64:
325
done = fold_qemu_st(&ctx, op);
326
break;
327
-
328
- default:
329
+ CASE_OP_32_64(rem):
330
+ CASE_OP_32_64(remu):
331
+ done = fold_remainder(&ctx, op);
332
+ break;
333
+ CASE_OP_32_64(rotl):
334
+ CASE_OP_32_64(rotr):
335
+ CASE_OP_32_64(sar):
336
+ CASE_OP_32_64(shl):
337
+ CASE_OP_32_64(shr):
338
+ done = fold_shift(&ctx, op);
339
+ break;
340
+ CASE_OP_32_64_VEC(sub):
341
+ done = fold_sub(&ctx, op);
342
+ break;
343
+ CASE_OP_32_64_VEC(xor):
344
+ done = fold_xor(&ctx, op);
345
break;
346
}
347
348
--
411
--
349
2.25.1
412
2.34.1
350
413
351
414
diff view generated by jsdifflib
1
Pull the "op r, a, 0 => movi r, 0" optimization into a function,
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
and use it in the outer opcode fold functions.
2
tcg_out_zext_addr_if_32_bit, and some code that lived in both
3
tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns
4
HostAddress and TCGLabelQemuLdst structures.
3
5
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
tcg/optimize.c | 38 ++++++++++++++++++++------------------
9
tcg/loongarch64/tcg-target.c.inc | 255 +++++++++++++------------------
9
1 file changed, 20 insertions(+), 18 deletions(-)
10
1 file changed, 105 insertions(+), 150 deletions(-)
10
11
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
--- a/tcg/loongarch64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
15
+++ b/tcg/loongarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[4] = {
16
return false;
17
[MO_64] = helper_le_stq_mmu,
18
};
19
20
-/* We expect to use a 12-bit negative offset from ENV. */
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
23
-
24
static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
25
{
26
tcg_out_opc_b(s, 0);
27
return reloc_br_sd10k16(s->code_ptr - 1, target);
17
}
28
}
18
29
19
+/* If the binary operation has second argument @i, fold to @i. */
30
-/*
20
+static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
31
- * Emits common code for TLB addend lookup, that eventually loads the
32
- * addend in TCG_REG_TMP2.
33
- */
34
-static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, MemOpIdx oi,
35
- tcg_insn_unit **label_ptr, bool is_load)
36
-{
37
- MemOp opc = get_memop(oi);
38
- unsigned s_bits = opc & MO_SIZE;
39
- unsigned a_bits = get_alignment_bits(opc);
40
- tcg_target_long compare_mask;
41
- int mem_index = get_mmuidx(oi);
42
- int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
43
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
44
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
45
-
46
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
47
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
48
-
49
- tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl,
50
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
51
- tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
52
- tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
53
-
54
- /* Load the tlb comparator and the addend. */
55
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
56
- is_load ? offsetof(CPUTLBEntry, addr_read)
57
- : offsetof(CPUTLBEntry, addr_write));
58
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
59
- offsetof(CPUTLBEntry, addend));
60
-
61
- /* We don't support unaligned accesses. */
62
- if (a_bits < s_bits) {
63
- a_bits = s_bits;
64
- }
65
- /* Clear the non-page, non-alignment bits from the address. */
66
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
67
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
68
- tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
69
-
70
- /* Compare masked address with the TLB entry. */
71
- label_ptr[0] = s->code_ptr;
72
- tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
73
-
74
- /* TLB Hit - addend in TCG_REG_TMP2, ready for use. */
75
-}
76
-
77
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
78
- TCGType type,
79
- TCGReg datalo, TCGReg addrlo,
80
- void *raddr, tcg_insn_unit **label_ptr)
81
-{
82
- TCGLabelQemuLdst *label = new_ldst_label(s);
83
-
84
- label->is_ld = is_ld;
85
- label->oi = oi;
86
- label->type = type;
87
- label->datalo_reg = datalo;
88
- label->datahi_reg = 0; /* unused */
89
- label->addrlo_reg = addrlo;
90
- label->addrhi_reg = 0; /* unused */
91
- label->raddr = tcg_splitwx_to_rx(raddr);
92
- label->label_ptr[0] = label_ptr[0];
93
-}
94
-
95
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
96
{
97
MemOpIdx oi = l->oi;
98
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
99
return tcg_out_goto(s, l->raddr);
100
}
101
#else
102
-
103
-/*
104
- * Alignment helpers for user-mode emulation
105
- */
106
-
107
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
108
- unsigned a_bits)
109
-{
110
- TCGLabelQemuLdst *l = new_ldst_label(s);
111
-
112
- l->is_ld = is_ld;
113
- l->addrlo_reg = addr_reg;
114
-
115
- /*
116
- * Without micro-architecture details, we don't know which of bstrpick or
117
- * andi is faster, so use bstrpick as it's not constrained by imm field
118
- * width. (Not to say alignments >= 2^12 are going to happen any time
119
- * soon, though)
120
- */
121
- tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
122
-
123
- l->label_ptr[0] = s->code_ptr;
124
- tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
125
-
126
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
127
-}
128
-
129
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
130
{
131
/* resolve label address */
132
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
133
134
#endif /* CONFIG_SOFTMMU */
135
136
-/*
137
- * `ext32u` the address register into the temp register given,
138
- * if target is 32-bit, no-op otherwise.
139
- *
140
- * Returns the address register ready for use with TLB addend.
141
- */
142
-static TCGReg tcg_out_zext_addr_if_32_bit(TCGContext *s,
143
- TCGReg addr, TCGReg tmp)
144
-{
145
- if (TARGET_LONG_BITS == 32) {
146
- tcg_out_ext32u(s, tmp, addr);
147
- return tmp;
148
- }
149
- return addr;
150
-}
151
-
152
typedef struct {
153
TCGReg base;
154
TCGReg index;
155
} HostAddress;
156
157
+/*
158
+ * For softmmu, perform the TLB load and compare.
159
+ * For useronly, perform any required alignment tests.
160
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
161
+ * is required and fill in @h with the host address for the fast path.
162
+ */
163
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
164
+ TCGReg addr_reg, MemOpIdx oi,
165
+ bool is_ld)
21
+{
166
+{
22
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
167
+ TCGLabelQemuLdst *ldst = NULL;
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
168
+ MemOp opc = get_memop(oi);
169
+ unsigned a_bits = get_alignment_bits(opc);
170
+
171
+#ifdef CONFIG_SOFTMMU
172
+ unsigned s_bits = opc & MO_SIZE;
173
+ int mem_index = get_mmuidx(oi);
174
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
175
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
176
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
177
+ tcg_target_long compare_mask;
178
+
179
+ ldst = new_ldst_label(s);
180
+ ldst->is_ld = is_ld;
181
+ ldst->oi = oi;
182
+ ldst->addrlo_reg = addr_reg;
183
+
184
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
185
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
186
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
187
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
188
+
189
+ tcg_out_opc_srli_d(s, TCG_REG_TMP2, addr_reg,
190
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
191
+ tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
192
+ tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
193
+
194
+ /* Load the tlb comparator and the addend. */
195
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
196
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
197
+ : offsetof(CPUTLBEntry, addr_write));
198
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
199
+ offsetof(CPUTLBEntry, addend));
200
+
201
+ /* We don't support unaligned accesses. */
202
+ if (a_bits < s_bits) {
203
+ a_bits = s_bits;
24
+ }
204
+ }
25
+ return false;
205
+ /* Clear the non-page, non-alignment bits from the address. */
206
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
207
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
208
+ tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
209
+
210
+ /* Compare masked address with the TLB entry. */
211
+ ldst->label_ptr[0] = s->code_ptr;
212
+ tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
213
+
214
+ h->index = TCG_REG_TMP2;
215
+#else
216
+ if (a_bits) {
217
+ ldst = new_ldst_label(s);
218
+
219
+ ldst->is_ld = is_ld;
220
+ ldst->oi = oi;
221
+ ldst->addrlo_reg = addr_reg;
222
+
223
+ /*
224
+ * Without micro-architecture details, we don't know which of
225
+ * bstrpick or andi is faster, so use bstrpick as it's not
226
+ * constrained by imm field width. Not to say alignments >= 2^12
227
+ * are going to happen any time soon.
228
+ */
229
+ tcg_out_opc_bstrpick_d(s, TCG_REG_TMP1, addr_reg, 0, a_bits - 1);
230
+
231
+ ldst->label_ptr[0] = s->code_ptr;
232
+ tcg_out_opc_bne(s, TCG_REG_TMP1, TCG_REG_ZERO, 0);
233
+ }
234
+
235
+ h->index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
236
+#endif
237
+
238
+ if (TARGET_LONG_BITS == 32) {
239
+ h->base = TCG_REG_TMP0;
240
+ tcg_out_ext32u(s, h->base, addr_reg);
241
+ } else {
242
+ h->base = addr_reg;
243
+ }
244
+
245
+ return ldst;
26
+}
246
+}
27
+
247
+
28
/* If the binary operation has both arguments equal, fold to @i. */
248
static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
29
static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
249
TCGReg rd, HostAddress h)
30
{
250
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
251
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, MemOp opc, TCGType type,
32
static bool fold_and(OptContext *ctx, TCGOp *op)
252
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
33
{
253
MemOpIdx oi, TCGType data_type)
34
if (fold_const2(ctx, op) ||
254
{
35
+ fold_xi_to_i(ctx, op, 0) ||
255
- MemOp opc = get_memop(oi);
36
fold_xx_to_x(ctx, op)) {
256
+ TCGLabelQemuLdst *ldst;
37
return true;
257
HostAddress h;
258
259
-#ifdef CONFIG_SOFTMMU
260
- tcg_insn_unit *label_ptr[1];
261
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
262
+ tcg_out_qemu_ld_indexed(s, get_memop(oi), data_type, data_reg, h);
263
264
- tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
265
- h.index = TCG_REG_TMP2;
266
-#else
267
- unsigned a_bits = get_alignment_bits(opc);
268
- if (a_bits) {
269
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
270
+ if (ldst) {
271
+ ldst->type = data_type;
272
+ ldst->datalo_reg = data_reg;
273
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
38
}
274
}
39
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
275
- h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
40
276
-#endif
41
static bool fold_mul(OptContext *ctx, TCGOp *op)
277
-
42
{
278
- h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
43
- return fold_const2(ctx, op);
279
- tcg_out_qemu_ld_indexed(s, opc, data_type, data_reg, h);
44
+ if (fold_const2(ctx, op) ||
280
-
45
+ fold_xi_to_i(ctx, op, 0)) {
281
-#ifdef CONFIG_SOFTMMU
46
+ return true;
282
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
47
+ }
283
- s->code_ptr, label_ptr);
48
+ return false;
284
-#endif
49
}
285
}
50
286
51
static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
287
static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
52
{
288
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_indexed(TCGContext *s, MemOp opc,
53
- return fold_const2(ctx, op);
289
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
54
+ if (fold_const2(ctx, op) ||
290
MemOpIdx oi, TCGType data_type)
55
+ fold_xi_to_i(ctx, op, 0)) {
291
{
56
+ return true;
292
- MemOp opc = get_memop(oi);
57
+ }
293
+ TCGLabelQemuLdst *ldst;
58
+ return false;
294
HostAddress h;
295
296
-#ifdef CONFIG_SOFTMMU
297
- tcg_insn_unit *label_ptr[1];
298
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
299
+ tcg_out_qemu_st_indexed(s, get_memop(oi), data_reg, h);
300
301
- tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
302
- h.index = TCG_REG_TMP2;
303
-#else
304
- unsigned a_bits = get_alignment_bits(opc);
305
- if (a_bits) {
306
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
307
+ if (ldst) {
308
+ ldst->type = data_type;
309
+ ldst->datalo_reg = data_reg;
310
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
311
}
312
- h.index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
313
-#endif
314
-
315
- h.base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
316
- tcg_out_qemu_st_indexed(s, opc, data_reg, h);
317
-
318
-#ifdef CONFIG_SOFTMMU
319
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
320
- s->code_ptr, label_ptr);
321
-#endif
59
}
322
}
60
323
61
static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
324
/*
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
63
continue;
64
}
65
66
- /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
67
- switch (opc) {
68
- CASE_OP_32_64_VEC(and):
69
- CASE_OP_32_64_VEC(mul):
70
- CASE_OP_32_64(muluh):
71
- CASE_OP_32_64(mulsh):
72
- if (arg_is_const(op->args[2])
73
- && arg_info(op->args[2])->val == 0) {
74
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
75
- continue;
76
- }
77
- break;
78
- default:
79
- break;
80
- }
81
-
82
/*
83
* Process each opcode.
84
* Sorted alphabetically by opcode as much as possible.
85
--
325
--
86
2.25.1
326
2.34.1
87
327
88
328
diff view generated by jsdifflib
1
Pull the "op r, a, i => mov r, a" optimization into a function,
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
and use them in the outer-most logical operations.
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
3
4
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/optimize.c | 61 +++++++++++++++++++++-----------------------------
8
tcg/mips/tcg-target.c.inc | 404 ++++++++++++++++----------------------
8
1 file changed, 26 insertions(+), 35 deletions(-)
9
1 file changed, 172 insertions(+), 232 deletions(-)
9
10
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
--- a/tcg/mips/tcg-target.c.inc
13
+++ b/tcg/optimize.c
14
+++ b/tcg/mips/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
15
@@ -XXX,XX +XXX,XX @@ static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
15
return false;
16
return i;
16
}
17
}
17
18
18
+/* If the binary operation has second argument @i, fold to identity. */
19
-/* We expect to use a 16-bit negative offset from ENV. */
19
+static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
22
-
23
-/*
24
- * Perform the tlb comparison operation.
25
- * The complete host address is placed in BASE.
26
- * Clobbers TMP0, TMP1, TMP2, TMP3.
27
- */
28
-static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
29
- TCGReg addrh, MemOpIdx oi,
30
- tcg_insn_unit *label_ptr[2], bool is_load)
31
-{
32
- MemOp opc = get_memop(oi);
33
- unsigned a_bits = get_alignment_bits(opc);
34
- unsigned s_bits = opc & MO_SIZE;
35
- unsigned a_mask = (1 << a_bits) - 1;
36
- unsigned s_mask = (1 << s_bits) - 1;
37
- int mem_index = get_mmuidx(oi);
38
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
39
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
40
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
41
- int add_off = offsetof(CPUTLBEntry, addend);
42
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
43
- : offsetof(CPUTLBEntry, addr_write));
44
- target_ulong tlb_mask;
45
-
46
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
47
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
48
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
49
-
50
- /* Extract the TLB index from the address into TMP3. */
51
- tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrl,
52
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
53
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
54
-
55
- /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
56
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
57
-
58
- /* Load the (low-half) tlb comparator. */
59
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
60
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
61
- } else {
62
- tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
63
- : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
64
- TCG_TMP0, TCG_TMP3, cmp_off);
65
- }
66
-
67
- /* Zero extend a 32-bit guest address for a 64-bit host. */
68
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
69
- tcg_out_ext32u(s, base, addrl);
70
- addrl = base;
71
- }
72
-
73
- /*
74
- * Mask the page bits, keeping the alignment bits to compare against.
75
- * For unaligned accesses, compare against the end of the access to
76
- * verify that it does not cross a page boundary.
77
- */
78
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
79
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
80
- if (a_mask >= s_mask) {
81
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
82
- } else {
83
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask);
84
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
85
- }
86
-
87
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
88
- /* Load the tlb addend for the fast path. */
89
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
90
- }
91
-
92
- label_ptr[0] = s->code_ptr;
93
- tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
94
-
95
- /* Load and test the high half tlb comparator. */
96
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
97
- /* delay slot */
98
- tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
99
-
100
- /* Load the tlb addend for the fast path. */
101
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
102
-
103
- label_ptr[1] = s->code_ptr;
104
- tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
105
- }
106
-
107
- /* delay slot */
108
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
109
-}
110
-
111
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
112
- TCGType ext,
113
- TCGReg datalo, TCGReg datahi,
114
- TCGReg addrlo, TCGReg addrhi,
115
- void *raddr, tcg_insn_unit *label_ptr[2])
116
-{
117
- TCGLabelQemuLdst *label = new_ldst_label(s);
118
-
119
- label->is_ld = is_ld;
120
- label->oi = oi;
121
- label->type = ext;
122
- label->datalo_reg = datalo;
123
- label->datahi_reg = datahi;
124
- label->addrlo_reg = addrlo;
125
- label->addrhi_reg = addrhi;
126
- label->raddr = tcg_splitwx_to_rx(raddr);
127
- label->label_ptr[0] = label_ptr[0];
128
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
129
- label->label_ptr[1] = label_ptr[1];
130
- }
131
-}
132
-
133
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
134
{
135
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
136
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
137
}
138
139
#else
140
-
141
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
142
- TCGReg addrhi, unsigned a_bits)
143
-{
144
- unsigned a_mask = (1 << a_bits) - 1;
145
- TCGLabelQemuLdst *l = new_ldst_label(s);
146
-
147
- l->is_ld = is_ld;
148
- l->addrlo_reg = addrlo;
149
- l->addrhi_reg = addrhi;
150
-
151
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
152
- tcg_debug_assert(a_bits < 16);
153
- tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
154
-
155
- l->label_ptr[0] = s->code_ptr;
156
- if (use_mips32r6_instructions) {
157
- tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
158
- } else {
159
- tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
160
- tcg_out_nop(s);
161
- }
162
-
163
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
164
-}
165
-
166
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
167
{
168
void *target;
169
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
170
}
171
#endif /* SOFTMMU */
172
173
+typedef struct {
174
+ TCGReg base;
175
+ MemOp align;
176
+} HostAddress;
177
+
178
+/*
179
+ * For softmmu, perform the TLB load and compare.
180
+ * For useronly, perform any required alignment tests.
181
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
182
+ * is required and fill in @h with the host address for the fast path.
183
+ */
184
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
185
+ TCGReg addrlo, TCGReg addrhi,
186
+ MemOpIdx oi, bool is_ld)
20
+{
187
+{
21
+ if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
188
+ TCGLabelQemuLdst *ldst = NULL;
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
189
+ MemOp opc = get_memop(oi);
23
+ }
190
+ unsigned a_bits = get_alignment_bits(opc);
24
+ return false;
191
+ unsigned s_bits = opc & MO_SIZE;
192
+ unsigned a_mask = (1 << a_bits) - 1;
193
+ TCGReg base;
194
+
195
+#ifdef CONFIG_SOFTMMU
196
+ unsigned s_mask = (1 << s_bits) - 1;
197
+ int mem_index = get_mmuidx(oi);
198
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
199
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
200
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
201
+ int add_off = offsetof(CPUTLBEntry, addend);
202
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
203
+ : offsetof(CPUTLBEntry, addr_write);
204
+ target_ulong tlb_mask;
205
+
206
+ ldst = new_ldst_label(s);
207
+ ldst->is_ld = is_ld;
208
+ ldst->oi = oi;
209
+ ldst->addrlo_reg = addrlo;
210
+ ldst->addrhi_reg = addrhi;
211
+ base = TCG_REG_A0;
212
+
213
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
214
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
215
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
216
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
217
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP1, TCG_AREG0, table_off);
218
+
219
+ /* Extract the TLB index from the address into TMP3. */
220
+ tcg_out_opc_sa(s, ALIAS_TSRL, TCG_TMP3, addrlo,
221
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
222
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP3, TCG_TMP3, TCG_TMP0);
223
+
224
+ /* Add the tlb_table pointer, creating the CPUTLBEntry address in TMP3. */
225
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, TCG_TMP1);
226
+
227
+ /* Load the (low-half) tlb comparator. */
228
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
229
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
230
+ } else {
231
+ tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
232
+ : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
233
+ TCG_TMP0, TCG_TMP3, cmp_off);
234
+ }
235
+
236
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
237
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
238
+ tcg_out_ext32u(s, base, addrlo);
239
+ addrlo = base;
240
+ }
241
+
242
+ /*
243
+ * Mask the page bits, keeping the alignment bits to compare against.
244
+ * For unaligned accesses, compare against the end of the access to
245
+ * verify that it does not cross a page boundary.
246
+ */
247
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
248
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
249
+ if (a_mask >= s_mask) {
250
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
251
+ } else {
252
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask);
253
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
254
+ }
255
+
256
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
257
+ /* Load the tlb addend for the fast path. */
258
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
259
+ }
260
+
261
+ ldst->label_ptr[0] = s->code_ptr;
262
+ tcg_out_opc_br(s, OPC_BNE, TCG_TMP1, TCG_TMP0);
263
+
264
+ /* Load and test the high half tlb comparator. */
265
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
266
+ /* delay slot */
267
+ tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
268
+
269
+ /* Load the tlb addend for the fast path. */
270
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
271
+
272
+ ldst->label_ptr[1] = s->code_ptr;
273
+ tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
274
+ }
275
+
276
+ /* delay slot */
277
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo);
278
+#else
279
+ if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
280
+ ldst = new_ldst_label(s);
281
+
282
+ ldst->is_ld = is_ld;
283
+ ldst->oi = oi;
284
+ ldst->addrlo_reg = addrlo;
285
+ ldst->addrhi_reg = addrhi;
286
+
287
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
288
+ tcg_debug_assert(a_bits < 16);
289
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_TMP0, addrlo, a_mask);
290
+
291
+ ldst->label_ptr[0] = s->code_ptr;
292
+ if (use_mips32r6_instructions) {
293
+ tcg_out_opc_br(s, OPC_BNEZALC_R6, TCG_REG_ZERO, TCG_TMP0);
294
+ } else {
295
+ tcg_out_opc_br(s, OPC_BNEL, TCG_TMP0, TCG_REG_ZERO);
296
+ tcg_out_nop(s);
297
+ }
298
+ }
299
+
300
+ base = addrlo;
301
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
302
+ tcg_out_ext32u(s, TCG_REG_A0, base);
303
+ base = TCG_REG_A0;
304
+ }
305
+ if (guest_base) {
306
+ if (guest_base == (int16_t)guest_base) {
307
+ tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
308
+ } else {
309
+ tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
310
+ TCG_GUEST_BASE_REG);
311
+ }
312
+ base = TCG_REG_A0;
313
+ }
314
+#endif
315
+
316
+ h->base = base;
317
+ h->align = a_bits;
318
+ return ldst;
25
+}
319
+}
26
+
320
+
27
/* If the binary operation has second argument @i, fold to NOT. */
321
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
28
static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
322
TCGReg base, MemOp opc, TCGType type)
29
{
323
{
30
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
324
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
31
325
MemOpIdx oi, TCGType data_type)
32
static bool fold_add(OptContext *ctx, TCGOp *op)
33
{
326
{
34
- return fold_const2(ctx, op);
327
MemOp opc = get_memop(oi);
35
+ if (fold_const2(ctx, op) ||
328
- unsigned a_bits = get_alignment_bits(opc);
36
+ fold_xi_to_x(ctx, op, 0)) {
329
- unsigned s_bits = opc & MO_SIZE;
37
+ return true;
330
- TCGReg base;
38
+ }
331
+ TCGLabelQemuLdst *ldst;
39
+ return false;
332
+ HostAddress h;
333
334
- /*
335
- * R6 removes the left/right instructions but requires the
336
- * system to support misaligned memory accesses.
337
- */
338
-#if defined(CONFIG_SOFTMMU)
339
- tcg_insn_unit *label_ptr[2];
340
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
341
342
- base = TCG_REG_A0;
343
- tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 1);
344
- if (use_mips32r6_instructions || a_bits >= s_bits) {
345
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
346
+ if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
347
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h.base, opc, data_type);
348
} else {
349
- tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
350
+ tcg_out_qemu_ld_unalign(s, datalo, datahi, h.base, opc, data_type);
351
}
352
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
353
- addrlo, addrhi, s->code_ptr, label_ptr);
354
-#else
355
- base = addrlo;
356
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
357
- tcg_out_ext32u(s, TCG_REG_A0, base);
358
- base = TCG_REG_A0;
359
+
360
+ if (ldst) {
361
+ ldst->type = data_type;
362
+ ldst->datalo_reg = datalo;
363
+ ldst->datahi_reg = datahi;
364
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
365
}
366
- if (guest_base) {
367
- if (guest_base == (int16_t)guest_base) {
368
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
369
- } else {
370
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
371
- TCG_GUEST_BASE_REG);
372
- }
373
- base = TCG_REG_A0;
374
- }
375
- if (use_mips32r6_instructions) {
376
- if (a_bits) {
377
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
378
- }
379
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
380
- } else {
381
- if (a_bits && a_bits != s_bits) {
382
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
383
- }
384
- if (a_bits >= s_bits) {
385
- tcg_out_qemu_ld_direct(s, datalo, datahi, base, opc, data_type);
386
- } else {
387
- tcg_out_qemu_ld_unalign(s, datalo, datahi, base, opc, data_type);
388
- }
389
- }
390
-#endif
40
}
391
}
41
392
42
static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
393
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
43
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
394
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
395
MemOpIdx oi, TCGType data_type)
44
{
396
{
45
if (fold_const2(ctx, op) ||
397
MemOp opc = get_memop(oi);
46
fold_xi_to_i(ctx, op, 0) ||
398
- unsigned a_bits = get_alignment_bits(opc);
47
+ fold_xi_to_x(ctx, op, -1) ||
399
- unsigned s_bits = opc & MO_SIZE;
48
fold_xx_to_x(ctx, op)) {
400
- TCGReg base;
49
return true;
401
+ TCGLabelQemuLdst *ldst;
402
+ HostAddress h;
403
404
- /*
405
- * R6 removes the left/right instructions but requires the
406
- * system to support misaligned memory accesses.
407
- */
408
-#if defined(CONFIG_SOFTMMU)
409
- tcg_insn_unit *label_ptr[2];
410
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
411
412
- base = TCG_REG_A0;
413
- tcg_out_tlb_load(s, base, addrlo, addrhi, oi, label_ptr, 0);
414
- if (use_mips32r6_instructions || a_bits >= s_bits) {
415
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
416
+ if (use_mips32r6_instructions || h.align >= (opc & MO_SIZE)) {
417
+ tcg_out_qemu_st_direct(s, datalo, datahi, h.base, opc);
418
} else {
419
- tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
420
+ tcg_out_qemu_st_unalign(s, datalo, datahi, h.base, opc);
50
}
421
}
51
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
422
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
52
{
423
- addrlo, addrhi, s->code_ptr, label_ptr);
53
if (fold_const2(ctx, op) ||
424
-#else
54
fold_xx_to_i(ctx, op, 0) ||
425
- base = addrlo;
55
+ fold_xi_to_x(ctx, op, 0) ||
426
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
56
fold_ix_to_not(ctx, op, -1)) {
427
- tcg_out_ext32u(s, TCG_REG_A0, base);
57
return true;
428
- base = TCG_REG_A0;
429
+
430
+ if (ldst) {
431
+ ldst->type = data_type;
432
+ ldst->datalo_reg = datalo;
433
+ ldst->datahi_reg = datahi;
434
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
58
}
435
}
59
@@ -XXX,XX +XXX,XX @@ static bool fold_dup2(OptContext *ctx, TCGOp *op)
436
- if (guest_base) {
60
static bool fold_eqv(OptContext *ctx, TCGOp *op)
437
- if (guest_base == (int16_t)guest_base) {
61
{
438
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_A0, base, guest_base);
62
if (fold_const2(ctx, op) ||
439
- } else {
63
+ fold_xi_to_x(ctx, op, -1) ||
440
- tcg_out_opc_reg(s, ALIAS_PADD, TCG_REG_A0, base,
64
fold_xi_to_not(ctx, op, 0)) {
441
- TCG_GUEST_BASE_REG);
65
return true;
442
- }
66
}
443
- base = TCG_REG_A0;
67
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
444
- }
68
static bool fold_or(OptContext *ctx, TCGOp *op)
445
- if (use_mips32r6_instructions) {
69
{
446
- if (a_bits) {
70
if (fold_const2(ctx, op) ||
447
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
71
+ fold_xi_to_x(ctx, op, 0) ||
448
- }
72
fold_xx_to_x(ctx, op)) {
449
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
73
return true;
450
- } else {
74
}
451
- if (a_bits && a_bits != s_bits) {
75
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
452
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
76
static bool fold_orc(OptContext *ctx, TCGOp *op)
453
- }
77
{
454
- if (a_bits >= s_bits) {
78
if (fold_const2(ctx, op) ||
455
- tcg_out_qemu_st_direct(s, datalo, datahi, base, opc);
79
+ fold_xi_to_x(ctx, op, -1) ||
456
- } else {
80
fold_ix_to_not(ctx, op, 0)) {
457
- tcg_out_qemu_st_unalign(s, datalo, datahi, base, opc);
81
return true;
458
- }
82
}
459
- }
83
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
460
-#endif
84
85
static bool fold_shift(OptContext *ctx, TCGOp *op)
86
{
87
- return fold_const2(ctx, op);
88
+ if (fold_const2(ctx, op) ||
89
+ fold_xi_to_x(ctx, op, 0)) {
90
+ return true;
91
+ }
92
+ return false;
93
}
461
}
94
462
95
static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
463
static void tcg_out_mb(TCGContext *s, TCGArg a0)
96
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
97
{
98
if (fold_const2(ctx, op) ||
99
fold_xx_to_i(ctx, op, 0) ||
100
+ fold_xi_to_x(ctx, op, 0) ||
101
fold_sub_to_neg(ctx, op)) {
102
return true;
103
}
104
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
105
{
106
if (fold_const2(ctx, op) ||
107
fold_xx_to_i(ctx, op, 0) ||
108
+ fold_xi_to_x(ctx, op, 0) ||
109
fold_xi_to_not(ctx, op, -1)) {
110
return true;
111
}
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
break;
114
}
115
116
- /* Simplify expression for "op r, a, const => mov r, a" cases */
117
- switch (opc) {
118
- CASE_OP_32_64_VEC(add):
119
- CASE_OP_32_64_VEC(sub):
120
- CASE_OP_32_64_VEC(or):
121
- CASE_OP_32_64_VEC(xor):
122
- CASE_OP_32_64_VEC(andc):
123
- CASE_OP_32_64(shl):
124
- CASE_OP_32_64(shr):
125
- CASE_OP_32_64(sar):
126
- CASE_OP_32_64(rotl):
127
- CASE_OP_32_64(rotr):
128
- if (!arg_is_const(op->args[1])
129
- && arg_is_const(op->args[2])
130
- && arg_info(op->args[2])->val == 0) {
131
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
132
- continue;
133
- }
134
- break;
135
- CASE_OP_32_64_VEC(and):
136
- CASE_OP_32_64_VEC(orc):
137
- CASE_OP_32_64(eqv):
138
- if (!arg_is_const(op->args[1])
139
- && arg_is_const(op->args[2])
140
- && arg_info(op->args[2])->val == -1) {
141
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
142
- continue;
143
- }
144
- break;
145
- default:
146
- break;
147
- }
148
-
149
/* Simplify using known-zero bits. Currently only ops with a single
150
output argument is supported. */
151
z_mask = -1;
152
--
464
--
153
2.25.1
465
2.34.1
154
466
155
467
diff view generated by jsdifflib
1
For constant shifts, we can simply shift the s_mask.
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
3
For variable shifts, we know that sar does not reduce
3
into one function that returns HostAddress and TCGLabelQemuLdst structures.
4
the s_mask, which helps for sequences like
5
6
ext32s_i64 t, in
7
sar_i64 t, t, v
8
ext32s_i64 out, t
9
10
allowing the final extend to be eliminated.
11
4
12
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
13
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
14
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
15
---
7
---
16
tcg/optimize.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++---
8
tcg/ppc/tcg-target.c.inc | 381 ++++++++++++++++++---------------------
17
1 file changed, 47 insertions(+), 3 deletions(-)
9
1 file changed, 172 insertions(+), 209 deletions(-)
18
10
19
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
20
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
21
--- a/tcg/optimize.c
13
--- a/tcg/ppc/tcg-target.c.inc
22
+++ b/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target.c.inc
23
@@ -XXX,XX +XXX,XX @@ static uint64_t smask_from_zmask(uint64_t zmask)
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
24
return ~(~0ull >> rep);
16
[MO_BEUQ] = helper_be_stq_mmu,
17
};
18
19
-/* We expect to use a 16-bit negative offset from ENV. */
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
22
-
23
-/* Perform the TLB load and compare. Places the result of the comparison
24
- in CR7, loads the addend of the TLB into R3, and returns the register
25
- containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
26
-
27
-static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
28
- TCGReg addrlo, TCGReg addrhi,
29
- int mem_index, bool is_read)
30
-{
31
- int cmp_off
32
- = (is_read
33
- ? offsetof(CPUTLBEntry, addr_read)
34
- : offsetof(CPUTLBEntry, addr_write));
35
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
36
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
37
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
38
- unsigned s_bits = opc & MO_SIZE;
39
- unsigned a_bits = get_alignment_bits(opc);
40
-
41
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
42
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
44
-
45
- /* Extract the page index, shifted into place for tlb index. */
46
- if (TCG_TARGET_REG_BITS == 32) {
47
- tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
48
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
49
- } else {
50
- tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
51
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
52
- }
53
- tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
54
-
55
- /* Load the TLB comparator. */
56
- if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
57
- uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
58
- ? LWZUX : LDUX);
59
- tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
60
- } else {
61
- tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
62
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
63
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
64
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
65
- } else {
66
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
67
- }
68
- }
69
-
70
- /* Load the TLB addend for use on the fast path. Do this asap
71
- to minimize any load use delay. */
72
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
73
- offsetof(CPUTLBEntry, addend));
74
-
75
- /* Clear the non-page, non-alignment bits from the address */
76
- if (TCG_TARGET_REG_BITS == 32) {
77
- /* We don't support unaligned accesses on 32-bits.
78
- * Preserve the bottom bits and thus trigger a comparison
79
- * failure on unaligned accesses.
80
- */
81
- if (a_bits < s_bits) {
82
- a_bits = s_bits;
83
- }
84
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
85
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
86
- } else {
87
- TCGReg t = addrlo;
88
-
89
- /* If the access is unaligned, we need to make sure we fail if we
90
- * cross a page boundary. The trick is to add the access size-1
91
- * to the address before masking the low bits. That will make the
92
- * address overflow to the next page if we cross a page boundary,
93
- * which will then force a mismatch of the TLB compare.
94
- */
95
- if (a_bits < s_bits) {
96
- unsigned a_mask = (1 << a_bits) - 1;
97
- unsigned s_mask = (1 << s_bits) - 1;
98
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
99
- t = TCG_REG_R0;
100
- }
101
-
102
- /* Mask the address for the requested alignment. */
103
- if (TARGET_LONG_BITS == 32) {
104
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
105
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
106
- /* Zero-extend the address for use in the final address. */
107
- tcg_out_ext32u(s, TCG_REG_R4, addrlo);
108
- addrlo = TCG_REG_R4;
109
- } else if (a_bits == 0) {
110
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
111
- } else {
112
- tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
113
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
114
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
115
- }
116
- }
117
-
118
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
119
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
120
- 0, 7, TCG_TYPE_I32);
121
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
122
- tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
123
- } else {
124
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
125
- 0, 7, TCG_TYPE_TL);
126
- }
127
-
128
- return addrlo;
129
-}
130
-
131
-/* Record the context of a call to the out of line helper code for the slow
132
- path for a load or store, so that we can later generate the correct
133
- helper code. */
134
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
135
- TCGType type, MemOpIdx oi,
136
- TCGReg datalo_reg, TCGReg datahi_reg,
137
- TCGReg addrlo_reg, TCGReg addrhi_reg,
138
- tcg_insn_unit *raddr, tcg_insn_unit *lptr)
139
-{
140
- TCGLabelQemuLdst *label = new_ldst_label(s);
141
-
142
- label->is_ld = is_ld;
143
- label->type = type;
144
- label->oi = oi;
145
- label->datalo_reg = datalo_reg;
146
- label->datahi_reg = datahi_reg;
147
- label->addrlo_reg = addrlo_reg;
148
- label->addrhi_reg = addrhi_reg;
149
- label->raddr = tcg_splitwx_to_rx(raddr);
150
- label->label_ptr[0] = lptr;
151
-}
152
-
153
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
154
{
155
MemOpIdx oi = lb->oi;
156
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
157
return true;
25
}
158
}
159
#else
160
-
161
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
162
- TCGReg addrhi, unsigned a_bits)
163
-{
164
- unsigned a_mask = (1 << a_bits) - 1;
165
- TCGLabelQemuLdst *label = new_ldst_label(s);
166
-
167
- label->is_ld = is_ld;
168
- label->addrlo_reg = addrlo;
169
- label->addrhi_reg = addrhi;
170
-
171
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
172
- tcg_debug_assert(a_bits < 16);
173
- tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
174
-
175
- label->label_ptr[0] = s->code_ptr;
176
- tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
177
-
178
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
179
-}
180
-
181
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
182
{
183
if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
184
@@ -XXX,XX +XXX,XX @@ typedef struct {
185
TCGReg index;
186
} HostAddress;
26
187
27
+/*
188
+/*
28
+ * Recreate a properly left-aligned smask after manipulation.
189
+ * For softmmu, perform the TLB load and compare.
29
+ * Some bit-shuffling, particularly shifts and rotates, may
190
+ * For useronly, perform any required alignment tests.
30
+ * retain sign bits on the left, but may scatter disconnected
191
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
31
+ * sign bits on the right. Retain only what remains to the left.
192
+ * is required and fill in @h with the host address for the fast path.
32
+ */
193
+ */
33
+static uint64_t smask_from_smask(int64_t smask)
194
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
195
+ TCGReg addrlo, TCGReg addrhi,
196
+ MemOpIdx oi, bool is_ld)
34
+{
197
+{
35
+ /* Only the 1 bits are significant for smask */
198
+ TCGLabelQemuLdst *ldst = NULL;
36
+ return smask_from_zmask(~smask);
199
+ MemOp opc = get_memop(oi);
200
+ unsigned a_bits = get_alignment_bits(opc);
201
+
202
+#ifdef CONFIG_SOFTMMU
203
+ int mem_index = get_mmuidx(oi);
204
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
205
+ : offsetof(CPUTLBEntry, addr_write);
206
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
207
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
208
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
209
+ unsigned s_bits = opc & MO_SIZE;
210
+
211
+ ldst = new_ldst_label(s);
212
+ ldst->is_ld = is_ld;
213
+ ldst->oi = oi;
214
+ ldst->addrlo_reg = addrlo;
215
+ ldst->addrhi_reg = addrhi;
216
+
217
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
218
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
219
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
220
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
221
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
222
+
223
+ /* Extract the page index, shifted into place for tlb index. */
224
+ if (TCG_TARGET_REG_BITS == 32) {
225
+ tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
226
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
227
+ } else {
228
+ tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
229
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
230
+ }
231
+ tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
232
+
233
+ /* Load the TLB comparator. */
234
+ if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
235
+ uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
236
+ ? LWZUX : LDUX);
237
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
238
+ } else {
239
+ tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
240
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
241
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
242
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
243
+ } else {
244
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
245
+ }
246
+ }
247
+
248
+ /*
249
+ * Load the TLB addend for use on the fast path.
250
+ * Do this asap to minimize any load use delay.
251
+ */
252
+ h->base = TCG_REG_R3;
253
+ tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3,
254
+ offsetof(CPUTLBEntry, addend));
255
+
256
+ /* Clear the non-page, non-alignment bits from the address */
257
+ if (TCG_TARGET_REG_BITS == 32) {
258
+ /*
259
+ * We don't support unaligned accesses on 32-bits.
260
+ * Preserve the bottom bits and thus trigger a comparison
261
+ * failure on unaligned accesses.
262
+ */
263
+ if (a_bits < s_bits) {
264
+ a_bits = s_bits;
265
+ }
266
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
267
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
268
+ } else {
269
+ TCGReg t = addrlo;
270
+
271
+ /*
272
+ * If the access is unaligned, we need to make sure we fail if we
273
+ * cross a page boundary. The trick is to add the access size-1
274
+ * to the address before masking the low bits. That will make the
275
+ * address overflow to the next page if we cross a page boundary,
276
+ * which will then force a mismatch of the TLB compare.
277
+ */
278
+ if (a_bits < s_bits) {
279
+ unsigned a_mask = (1 << a_bits) - 1;
280
+ unsigned s_mask = (1 << s_bits) - 1;
281
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
282
+ t = TCG_REG_R0;
283
+ }
284
+
285
+ /* Mask the address for the requested alignment. */
286
+ if (TARGET_LONG_BITS == 32) {
287
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
288
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
289
+ /* Zero-extend the address for use in the final address. */
290
+ tcg_out_ext32u(s, TCG_REG_R4, addrlo);
291
+ addrlo = TCG_REG_R4;
292
+ } else if (a_bits == 0) {
293
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
294
+ } else {
295
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
296
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
297
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
298
+ }
299
+ }
300
+ h->index = addrlo;
301
+
302
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
303
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
304
+ 0, 7, TCG_TYPE_I32);
305
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
306
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
307
+ } else {
308
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
309
+ 0, 7, TCG_TYPE_TL);
310
+ }
311
+
312
+ /* Load a pointer into the current opcode w/conditional branch-link. */
313
+ ldst->label_ptr[0] = s->code_ptr;
314
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
315
+#else
316
+ if (a_bits) {
317
+ ldst = new_ldst_label(s);
318
+ ldst->is_ld = is_ld;
319
+ ldst->oi = oi;
320
+ ldst->addrlo_reg = addrlo;
321
+ ldst->addrhi_reg = addrhi;
322
+
323
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
324
+ tcg_debug_assert(a_bits < 16);
325
+ tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
326
+
327
+ ldst->label_ptr[0] = s->code_ptr;
328
+ tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
329
+ }
330
+
331
+ h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
332
+ h->index = addrlo;
333
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
334
+ tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
335
+ h->index = TCG_REG_TMP1;
336
+ }
337
+#endif
338
+
339
+ return ldst;
37
+}
340
+}
38
+
341
+
39
static inline TempOptInfo *ts_info(TCGTemp *ts)
342
static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
343
TCGReg addrlo, TCGReg addrhi,
344
MemOpIdx oi, TCGType data_type)
40
{
345
{
41
return ts->state_ptr;
346
MemOp opc = get_memop(oi);
42
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
347
- MemOp s_bits = opc & MO_SIZE;
43
348
+ TCGLabelQemuLdst *ldst;
44
static bool fold_shift(OptContext *ctx, TCGOp *op)
349
HostAddress h;
350
351
-#ifdef CONFIG_SOFTMMU
352
- tcg_insn_unit *label_ptr;
353
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
354
355
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
356
- h.base = TCG_REG_R3;
357
-
358
- /* Load a pointer into the current opcode w/conditional branch-link. */
359
- label_ptr = s->code_ptr;
360
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
361
-#else /* !CONFIG_SOFTMMU */
362
- unsigned a_bits = get_alignment_bits(opc);
363
- if (a_bits) {
364
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
365
- }
366
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
367
- h.index = addrlo;
368
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
369
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
370
- h.index = TCG_REG_TMP1;
371
- }
372
-#endif
373
-
374
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
375
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
376
if (opc & MO_BSWAP) {
377
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
378
tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
379
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
380
}
381
}
382
383
-#ifdef CONFIG_SOFTMMU
384
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
385
- addrlo, addrhi, s->code_ptr, label_ptr);
386
-#endif
387
+ if (ldst) {
388
+ ldst->type = data_type;
389
+ ldst->datalo_reg = datalo;
390
+ ldst->datahi_reg = datahi;
391
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
392
+ }
393
}
394
395
static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
396
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
397
MemOpIdx oi, TCGType data_type)
45
{
398
{
46
+ uint64_t s_mask, z_mask, sign;
399
MemOp opc = get_memop(oi);
47
+
400
- MemOp s_bits = opc & MO_SIZE;
48
if (fold_const2(ctx, op) ||
401
+ TCGLabelQemuLdst *ldst;
49
fold_ix_to_i(ctx, op, 0) ||
402
HostAddress h;
50
fold_xi_to_x(ctx, op, 0)) {
403
51
return true;
404
-#ifdef CONFIG_SOFTMMU
405
- tcg_insn_unit *label_ptr;
406
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
407
408
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
409
- h.base = TCG_REG_R3;
410
-
411
- /* Load a pointer into the current opcode w/conditional branch-link. */
412
- label_ptr = s->code_ptr;
413
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
414
-#else /* !CONFIG_SOFTMMU */
415
- unsigned a_bits = get_alignment_bits(opc);
416
- if (a_bits) {
417
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
418
- }
419
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
420
- h.index = addrlo;
421
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
422
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
423
- h.index = TCG_REG_TMP1;
424
- }
425
-#endif
426
-
427
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
428
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
429
if (opc & MO_BSWAP) {
430
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
431
tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
432
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
433
}
52
}
434
}
53
435
54
+ s_mask = arg_info(op->args[1])->s_mask;
436
-#ifdef CONFIG_SOFTMMU
55
+ z_mask = arg_info(op->args[1])->z_mask;
437
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
56
+
438
- addrlo, addrhi, s->code_ptr, label_ptr);
57
if (arg_is_const(op->args[2])) {
439
-#endif
58
- ctx->z_mask = do_constant_folding(op->opc, ctx->type,
440
+ if (ldst) {
59
- arg_info(op->args[1])->z_mask,
441
+ ldst->type = data_type;
60
- arg_info(op->args[2])->val);
442
+ ldst->datalo_reg = datalo;
61
+ int sh = arg_info(op->args[2])->val;
443
+ ldst->datahi_reg = datahi;
62
+
444
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
63
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
445
+ }
64
+
65
+ s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
66
+ ctx->s_mask = smask_from_smask(s_mask);
67
+
68
return fold_masks(ctx, op);
69
}
70
+
71
+ switch (op->opc) {
72
+ CASE_OP_32_64(sar):
73
+ /*
74
+ * Arithmetic right shift will not reduce the number of
75
+ * input sign repetitions.
76
+ */
77
+ ctx->s_mask = s_mask;
78
+ break;
79
+ CASE_OP_32_64(shr):
80
+ /*
81
+ * If the sign bit is known zero, then logical right shift
82
+ * will not reduced the number of input sign repetitions.
83
+ */
84
+ sign = (s_mask & -s_mask) >> 1;
85
+ if (!(z_mask & sign)) {
86
+ ctx->s_mask = s_mask;
87
+ }
88
+ break;
89
+ default:
90
+ break;
91
+ }
92
+
93
return false;
94
}
446
}
95
447
448
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
96
--
449
--
97
2.25.1
450
2.34.1
98
451
99
452
diff view generated by jsdifflib
1
This is the final entry in the main switch that was in a
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
different form. After this, we have the option to convert
2
and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st
3
the switch into a function dispatch table.
3
into one function that returns TCGReg and TCGLabelQemuLdst.
4
4
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 27 ++++++++++++++-------------
8
tcg/riscv/tcg-target.c.inc | 253 +++++++++++++++++--------------------
10
1 file changed, 14 insertions(+), 13 deletions(-)
9
1 file changed, 114 insertions(+), 139 deletions(-)
11
10
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
13
--- a/tcg/riscv/tcg-target.c.inc
15
+++ b/tcg/optimize.c
14
+++ b/tcg/riscv/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
16
#endif
17
};
18
19
-/* We expect to use a 12-bit negative offset from ENV. */
20
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
22
-
23
static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
24
{
25
tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, 0);
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
27
tcg_debug_assert(ok);
28
}
29
30
-static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, MemOpIdx oi,
31
- tcg_insn_unit **label_ptr, bool is_load)
32
-{
33
- MemOp opc = get_memop(oi);
34
- unsigned s_bits = opc & MO_SIZE;
35
- unsigned a_bits = get_alignment_bits(opc);
36
- tcg_target_long compare_mask;
37
- int mem_index = get_mmuidx(oi);
38
- int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
39
- int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
40
- int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
41
- TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
42
-
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
44
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
45
-
46
- tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr,
47
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
48
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
49
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
50
-
51
- /* Load the tlb comparator and the addend. */
52
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
53
- is_load ? offsetof(CPUTLBEntry, addr_read)
54
- : offsetof(CPUTLBEntry, addr_write));
55
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
56
- offsetof(CPUTLBEntry, addend));
57
-
58
- /* We don't support unaligned accesses. */
59
- if (a_bits < s_bits) {
60
- a_bits = s_bits;
61
- }
62
- /* Clear the non-page, non-alignment bits from the address. */
63
- compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
64
- if (compare_mask == sextreg(compare_mask, 0, 12)) {
65
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr, compare_mask);
66
- } else {
67
- tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
68
- tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr);
69
- }
70
-
71
- /* Compare masked address with the TLB entry. */
72
- label_ptr[0] = s->code_ptr;
73
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
74
-
75
- /* TLB Hit - translate address using addend. */
76
- if (TARGET_LONG_BITS == 32) {
77
- tcg_out_ext32u(s, TCG_REG_TMP0, addr);
78
- addr = TCG_REG_TMP0;
79
- }
80
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr);
81
- return TCG_REG_TMP0;
82
-}
83
-
84
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, MemOpIdx oi,
85
- TCGType data_type, TCGReg data_reg,
86
- TCGReg addr_reg, void *raddr,
87
- tcg_insn_unit **label_ptr)
88
-{
89
- TCGLabelQemuLdst *label = new_ldst_label(s);
90
-
91
- label->is_ld = is_ld;
92
- label->oi = oi;
93
- label->type = data_type;
94
- label->datalo_reg = data_reg;
95
- label->addrlo_reg = addr_reg;
96
- label->raddr = tcg_splitwx_to_rx(raddr);
97
- label->label_ptr[0] = label_ptr[0];
98
-}
99
-
100
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
101
{
102
MemOpIdx oi = l->oi;
103
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
17
return true;
104
return true;
18
}
105
}
19
106
#else
20
+static bool fold_mov(OptContext *ctx, TCGOp *op)
107
-
108
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addr_reg,
109
- unsigned a_bits)
110
-{
111
- unsigned a_mask = (1 << a_bits) - 1;
112
- TCGLabelQemuLdst *l = new_ldst_label(s);
113
-
114
- l->is_ld = is_ld;
115
- l->addrlo_reg = addr_reg;
116
-
117
- /* We are expecting a_bits to max out at 7, so we can always use andi. */
118
- tcg_debug_assert(a_bits < 12);
119
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
120
-
121
- l->label_ptr[0] = s->code_ptr;
122
- tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
123
-
124
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
125
-}
126
-
127
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
128
{
129
/* resolve label address */
130
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
131
{
132
return tcg_out_fail_alignment(s, l);
133
}
134
-
135
#endif /* CONFIG_SOFTMMU */
136
137
+/*
138
+ * For softmmu, perform the TLB load and compare.
139
+ * For useronly, perform any required alignment tests.
140
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
141
+ * is required and fill in @h with the host address for the fast path.
142
+ */
143
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase,
144
+ TCGReg addr_reg, MemOpIdx oi,
145
+ bool is_ld)
21
+{
146
+{
22
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
147
+ TCGLabelQemuLdst *ldst = NULL;
148
+ MemOp opc = get_memop(oi);
149
+ unsigned a_bits = get_alignment_bits(opc);
150
+ unsigned a_mask = (1u << a_bits) - 1;
151
+
152
+#ifdef CONFIG_SOFTMMU
153
+ unsigned s_bits = opc & MO_SIZE;
154
+ int mem_index = get_mmuidx(oi);
155
+ int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
156
+ int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
157
+ int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
158
+ TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
159
+ tcg_target_long compare_mask;
160
+
161
+ ldst = new_ldst_label(s);
162
+ ldst->is_ld = is_ld;
163
+ ldst->oi = oi;
164
+ ldst->addrlo_reg = addr_reg;
165
+
166
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
167
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
168
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, mask_base, mask_ofs);
169
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, table_base, table_ofs);
170
+
171
+ tcg_out_opc_imm(s, OPC_SRLI, TCG_REG_TMP2, addr_reg,
172
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
173
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
174
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
175
+
176
+ /* Load the tlb comparator and the addend. */
177
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
178
+ is_ld ? offsetof(CPUTLBEntry, addr_read)
179
+ : offsetof(CPUTLBEntry, addr_write));
180
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
181
+ offsetof(CPUTLBEntry, addend));
182
+
183
+ /* We don't support unaligned accesses. */
184
+ if (a_bits < s_bits) {
185
+ a_bits = s_bits;
186
+ }
187
+ /* Clear the non-page, non-alignment bits from the address. */
188
+ compare_mask = (tcg_target_long)TARGET_PAGE_MASK | a_mask;
189
+ if (compare_mask == sextreg(compare_mask, 0, 12)) {
190
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, compare_mask);
191
+ } else {
192
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
193
+ tcg_out_opc_reg(s, OPC_AND, TCG_REG_TMP1, TCG_REG_TMP1, addr_reg);
194
+ }
195
+
196
+ /* Compare masked address with the TLB entry. */
197
+ ldst->label_ptr[0] = s->code_ptr;
198
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0);
199
+
200
+ /* TLB Hit - translate address using addend. */
201
+ if (TARGET_LONG_BITS == 32) {
202
+ tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg);
203
+ addr_reg = TCG_REG_TMP0;
204
+ }
205
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_reg);
206
+ *pbase = TCG_REG_TMP0;
207
+#else
208
+ if (a_mask) {
209
+ ldst = new_ldst_label(s);
210
+ ldst->is_ld = is_ld;
211
+ ldst->oi = oi;
212
+ ldst->addrlo_reg = addr_reg;
213
+
214
+ /* We are expecting a_bits max 7, so we can always use andi. */
215
+ tcg_debug_assert(a_bits < 12);
216
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_TMP1, addr_reg, a_mask);
217
+
218
+ ldst->label_ptr[0] = s->code_ptr;
219
+ tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0);
220
+ }
221
+
222
+ TCGReg base = addr_reg;
223
+ if (TARGET_LONG_BITS == 32) {
224
+ tcg_out_ext32u(s, TCG_REG_TMP0, base);
225
+ base = TCG_REG_TMP0;
226
+ }
227
+ if (guest_base != 0) {
228
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
229
+ base = TCG_REG_TMP0;
230
+ }
231
+ *pbase = base;
232
+#endif
233
+
234
+ return ldst;
23
+}
235
+}
24
+
236
+
25
static bool fold_movcond(OptContext *ctx, TCGOp *op)
237
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
26
{
238
TCGReg base, MemOp opc, TCGType type)
27
TCGOpcode opc = op->opc;
239
{
28
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
240
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
29
break;
241
static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
30
}
242
MemOpIdx oi, TCGType data_type)
31
243
{
32
- /* Propagate constants through copy operations and do constant
244
- MemOp opc = get_memop(oi);
33
- folding. Constants will be substituted to arguments by register
245
+ TCGLabelQemuLdst *ldst;
34
- allocator where needed and possible. Also detect copies. */
246
TCGReg base;
35
+ /*
247
36
+ * Process each opcode.
248
-#if defined(CONFIG_SOFTMMU)
37
+ * Sorted alphabetically by opcode as much as possible.
249
- tcg_insn_unit *label_ptr[1];
38
+ */
250
+ ldst = prepare_host_addr(s, &base, addr_reg, oi, true);
39
switch (opc) {
251
+ tcg_out_qemu_ld_direct(s, data_reg, base, get_memop(oi), data_type);
40
- CASE_OP_32_64_VEC(mov):
252
41
- done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
253
- base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
42
- break;
254
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
43
-
255
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
44
- default:
256
- s->code_ptr, label_ptr);
45
- break;
257
-#else
46
-
258
- unsigned a_bits = get_alignment_bits(opc);
47
- /* ---------------------------------------------------------- */
259
- if (a_bits) {
48
- /* Sorted alphabetically by opcode as much as possible. */
260
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
49
-
261
+ if (ldst) {
50
CASE_OP_32_64_VEC(add):
262
+ ldst->type = data_type;
51
done = fold_add(&ctx, op);
263
+ ldst->datalo_reg = data_reg;
52
break;
264
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
53
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
265
}
54
case INDEX_op_mb:
266
- base = addr_reg;
55
done = fold_mb(&ctx, op);
267
- if (TARGET_LONG_BITS == 32) {
56
break;
268
- tcg_out_ext32u(s, TCG_REG_TMP0, base);
57
+ CASE_OP_32_64_VEC(mov):
269
- base = TCG_REG_TMP0;
58
+ done = fold_mov(&ctx, op);
270
- }
59
+ break;
271
- if (guest_base != 0) {
60
CASE_OP_32_64(movcond):
272
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
61
done = fold_movcond(&ctx, op);
273
- base = TCG_REG_TMP0;
62
break;
274
- }
63
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
275
- tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
64
CASE_OP_32_64_VEC(xor):
276
-#endif
65
done = fold_xor(&ctx, op);
277
}
66
break;
278
67
+ default:
279
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
68
+ break;
280
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg val,
69
}
281
static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
70
282
MemOpIdx oi, TCGType data_type)
71
if (!done) {
283
{
284
- MemOp opc = get_memop(oi);
285
+ TCGLabelQemuLdst *ldst;
286
TCGReg base;
287
288
-#if defined(CONFIG_SOFTMMU)
289
- tcg_insn_unit *label_ptr[1];
290
+ ldst = prepare_host_addr(s, &base, addr_reg, oi, false);
291
+ tcg_out_qemu_st_direct(s, data_reg, base, get_memop(oi));
292
293
- base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
294
- tcg_out_qemu_st_direct(s, data_reg, base, opc);
295
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
296
- s->code_ptr, label_ptr);
297
-#else
298
- unsigned a_bits = get_alignment_bits(opc);
299
- if (a_bits) {
300
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
301
+ if (ldst) {
302
+ ldst->type = data_type;
303
+ ldst->datalo_reg = data_reg;
304
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
305
}
306
- base = addr_reg;
307
- if (TARGET_LONG_BITS == 32) {
308
- tcg_out_ext32u(s, TCG_REG_TMP0, base);
309
- base = TCG_REG_TMP0;
310
- }
311
- if (guest_base != 0) {
312
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
313
- base = TCG_REG_TMP0;
314
- }
315
- tcg_out_qemu_st_direct(s, data_reg, base, opc);
316
-#endif
317
}
318
319
static const tcg_insn_unit *tb_ret_addr;
72
--
320
--
73
2.25.1
321
2.34.1
74
322
75
323
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Merge tcg_out_tlb_load, add_qemu_ldst_label, tcg_out_test_alignment,
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
tcg_prepare_user_ldst, and some code that lived in both tcg_out_qemu_ld
3
and tcg_out_qemu_st into one function that returns HostAddress and
4
TCGLabelQemuLdst structures.
5
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
8
---
5
tcg/optimize.c | 32 ++++++++++++++++++--------------
9
tcg/s390x/tcg-target.c.inc | 263 ++++++++++++++++---------------------
6
1 file changed, 18 insertions(+), 14 deletions(-)
10
1 file changed, 113 insertions(+), 150 deletions(-)
7
11
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
14
--- a/tcg/s390x/tcg-target.c.inc
11
+++ b/tcg/optimize.c
15
+++ b/tcg/s390x/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
17
}
18
19
#if defined(CONFIG_SOFTMMU)
20
-/* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
21
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
22
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
23
-
24
-/* Load and compare a TLB entry, leaving the flags set. Loads the TLB
25
- addend into R2. Returns a register with the santitized guest address. */
26
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
27
- int mem_index, bool is_ld)
28
-{
29
- unsigned s_bits = opc & MO_SIZE;
30
- unsigned a_bits = get_alignment_bits(opc);
31
- unsigned s_mask = (1 << s_bits) - 1;
32
- unsigned a_mask = (1 << a_bits) - 1;
33
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
34
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
35
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
36
- int ofs, a_off;
37
- uint64_t tlb_mask;
38
-
39
- tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
40
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
41
- tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
42
- tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
43
-
44
- /* For aligned accesses, we check the first byte and include the alignment
45
- bits within the address. For unaligned access, we check that we don't
46
- cross pages using the address of the last byte of the access. */
47
- a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
48
- tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
49
- if (a_off == 0) {
50
- tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
51
- } else {
52
- tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
53
- tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
54
- }
55
-
56
- if (is_ld) {
57
- ofs = offsetof(CPUTLBEntry, addr_read);
58
- } else {
59
- ofs = offsetof(CPUTLBEntry, addr_write);
60
- }
61
- if (TARGET_LONG_BITS == 32) {
62
- tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
63
- } else {
64
- tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
65
- }
66
-
67
- tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
68
- offsetof(CPUTLBEntry, addend));
69
-
70
- if (TARGET_LONG_BITS == 32) {
71
- tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
72
- return TCG_REG_R3;
73
- }
74
- return addr_reg;
75
-}
76
-
77
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
78
- TCGType type, TCGReg data, TCGReg addr,
79
- tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
80
-{
81
- TCGLabelQemuLdst *label = new_ldst_label(s);
82
-
83
- label->is_ld = is_ld;
84
- label->oi = oi;
85
- label->type = type;
86
- label->datalo_reg = data;
87
- label->addrlo_reg = addr;
88
- label->raddr = tcg_splitwx_to_rx(raddr);
89
- label->label_ptr[0] = label_ptr;
90
-}
91
-
92
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
93
{
94
TCGReg addr_reg = lb->addrlo_reg;
95
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
13
return true;
96
return true;
14
}
97
}
15
98
#else
16
+static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
99
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
17
+{
100
- TCGReg addrlo, unsigned a_bits)
18
+ if (arg_is_const(op->args[1])) {
101
-{
19
+ uint64_t t = arg_info(op->args[1])->val;
102
- unsigned a_mask = (1 << a_bits) - 1;
20
+
103
- TCGLabelQemuLdst *l = new_ldst_label(s);
21
+ if (t != 0) {
104
-
22
+ t = do_constant_folding(op->opc, t, 0);
105
- l->is_ld = is_ld;
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
106
- l->addrlo_reg = addrlo;
24
+ }
107
-
25
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
108
- /* We are expecting a_bits to max out at 7, much lower than TMLL. */
26
+ }
109
- tcg_debug_assert(a_bits < 16);
27
+ return false;
110
- tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
28
+}
111
-
29
+
112
- tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
30
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
113
- l->label_ptr[0] = s->code_ptr;
31
{
114
- s->code_ptr += 1;
32
return fold_const1(ctx, op);
115
-
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
116
- l->raddr = tcg_splitwx_to_rx(s->code_ptr);
34
}
117
-}
35
break;
118
-
36
119
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
37
- CASE_OP_32_64(clz):
120
{
38
- CASE_OP_32_64(ctz):
121
if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
39
- if (arg_is_const(op->args[1])) {
122
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
40
- TCGArg v = arg_info(op->args[1])->val;
123
{
41
- if (v != 0) {
124
return tcg_out_fail_alignment(s, l);
42
- tmp = do_constant_folding(opc, v, 0);
125
}
43
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
126
+#endif /* CONFIG_SOFTMMU */
44
- } else {
127
45
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
128
-static HostAddress tcg_prepare_user_ldst(TCGContext *s, TCGReg addr_reg)
46
- }
129
+/*
47
- continue;
130
+ * For softmmu, perform the TLB load and compare.
48
- }
131
+ * For useronly, perform any required alignment tests.
49
- break;
132
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
50
-
133
+ * is required and fill in @h with the host address for the fast path.
51
default:
134
+ */
52
break;
135
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
53
136
+ TCGReg addr_reg, MemOpIdx oi,
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
+ bool is_ld)
55
case INDEX_op_brcond2_i32:
138
{
56
done = fold_brcond2(&ctx, op);
139
- TCGReg index;
57
break;
140
- int disp;
58
+ CASE_OP_32_64(clz):
141
+ TCGLabelQemuLdst *ldst = NULL;
59
+ CASE_OP_32_64(ctz):
142
+ MemOp opc = get_memop(oi);
60
+ done = fold_count_zeros(&ctx, op);
143
+ unsigned a_bits = get_alignment_bits(opc);
61
+ break;
144
+ unsigned a_mask = (1u << a_bits) - 1;
62
CASE_OP_32_64(ctpop):
145
63
done = fold_ctpop(&ctx, op);
146
+#ifdef CONFIG_SOFTMMU
64
break;
147
+ unsigned s_bits = opc & MO_SIZE;
148
+ unsigned s_mask = (1 << s_bits) - 1;
149
+ int mem_index = get_mmuidx(oi);
150
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
151
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
152
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
153
+ int ofs, a_off;
154
+ uint64_t tlb_mask;
155
+
156
+ ldst = new_ldst_label(s);
157
+ ldst->is_ld = is_ld;
158
+ ldst->oi = oi;
159
+ ldst->addrlo_reg = addr_reg;
160
+
161
+ tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
162
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
163
+
164
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
165
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
166
+ tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
167
+ tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
168
+
169
+ /*
170
+ * For aligned accesses, we check the first byte and include the alignment
171
+ * bits within the address. For unaligned access, we check that we don't
172
+ * cross pages using the address of the last byte of the access.
173
+ */
174
+ a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
175
+ tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
176
+ if (a_off == 0) {
177
+ tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
178
+ } else {
179
+ tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
180
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
181
+ }
182
+
183
+ if (is_ld) {
184
+ ofs = offsetof(CPUTLBEntry, addr_read);
185
+ } else {
186
+ ofs = offsetof(CPUTLBEntry, addr_write);
187
+ }
188
+ if (TARGET_LONG_BITS == 32) {
189
+ tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
190
+ } else {
191
+ tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
192
+ }
193
+
194
+ tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
195
+ ldst->label_ptr[0] = s->code_ptr++;
196
+
197
+ h->index = TCG_REG_R2;
198
+ tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
199
+ offsetof(CPUTLBEntry, addend));
200
+
201
+ h->base = addr_reg;
202
+ if (TARGET_LONG_BITS == 32) {
203
+ tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
204
+ h->base = TCG_REG_R3;
205
+ }
206
+ h->disp = 0;
207
+#else
208
+ if (a_mask) {
209
+ ldst = new_ldst_label(s);
210
+ ldst->is_ld = is_ld;
211
+ ldst->oi = oi;
212
+ ldst->addrlo_reg = addr_reg;
213
+
214
+ /* We are expecting a_bits to max out at 7, much lower than TMLL. */
215
+ tcg_debug_assert(a_bits < 16);
216
+ tcg_out_insn(s, RI, TMLL, addr_reg, a_mask);
217
+
218
+ tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
219
+ ldst->label_ptr[0] = s->code_ptr++;
220
+ }
221
+
222
+ h->base = addr_reg;
223
if (TARGET_LONG_BITS == 32) {
224
tcg_out_ext32u(s, TCG_TMP0, addr_reg);
225
- addr_reg = TCG_TMP0;
226
+ h->base = TCG_TMP0;
227
}
228
if (guest_base < 0x80000) {
229
- index = TCG_REG_NONE;
230
- disp = guest_base;
231
+ h->index = TCG_REG_NONE;
232
+ h->disp = guest_base;
233
} else {
234
- index = TCG_GUEST_BASE_REG;
235
- disp = 0;
236
+ h->index = TCG_GUEST_BASE_REG;
237
+ h->disp = 0;
238
}
239
- return (HostAddress){ .base = addr_reg, .index = index, .disp = disp };
240
+#endif
241
+
242
+ return ldst;
243
}
244
-#endif /* CONFIG_SOFTMMU */
245
246
static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
247
MemOpIdx oi, TCGType data_type)
248
{
249
- MemOp opc = get_memop(oi);
250
+ TCGLabelQemuLdst *ldst;
251
HostAddress h;
252
253
-#ifdef CONFIG_SOFTMMU
254
- unsigned mem_index = get_mmuidx(oi);
255
- tcg_insn_unit *label_ptr;
256
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, true);
257
+ tcg_out_qemu_ld_direct(s, get_memop(oi), data_reg, h);
258
259
- h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
260
- h.index = TCG_REG_R2;
261
- h.disp = 0;
262
-
263
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
264
- label_ptr = s->code_ptr;
265
- s->code_ptr += 1;
266
-
267
- tcg_out_qemu_ld_direct(s, opc, data_reg, h);
268
-
269
- add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
270
- s->code_ptr, label_ptr);
271
-#else
272
- unsigned a_bits = get_alignment_bits(opc);
273
-
274
- if (a_bits) {
275
- tcg_out_test_alignment(s, true, addr_reg, a_bits);
276
+ if (ldst) {
277
+ ldst->type = data_type;
278
+ ldst->datalo_reg = data_reg;
279
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
280
}
281
- h = tcg_prepare_user_ldst(s, addr_reg);
282
- tcg_out_qemu_ld_direct(s, opc, data_reg, h);
283
-#endif
284
}
285
286
static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
287
MemOpIdx oi, TCGType data_type)
288
{
289
- MemOp opc = get_memop(oi);
290
+ TCGLabelQemuLdst *ldst;
291
HostAddress h;
292
293
-#ifdef CONFIG_SOFTMMU
294
- unsigned mem_index = get_mmuidx(oi);
295
- tcg_insn_unit *label_ptr;
296
+ ldst = prepare_host_addr(s, &h, addr_reg, oi, false);
297
+ tcg_out_qemu_st_direct(s, get_memop(oi), data_reg, h);
298
299
- h.base = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
300
- h.index = TCG_REG_R2;
301
- h.disp = 0;
302
-
303
- tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
304
- label_ptr = s->code_ptr;
305
- s->code_ptr += 1;
306
-
307
- tcg_out_qemu_st_direct(s, opc, data_reg, h);
308
-
309
- add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
310
- s->code_ptr, label_ptr);
311
-#else
312
- unsigned a_bits = get_alignment_bits(opc);
313
-
314
- if (a_bits) {
315
- tcg_out_test_alignment(s, false, addr_reg, a_bits);
316
+ if (ldst) {
317
+ ldst->type = data_type;
318
+ ldst->datalo_reg = data_reg;
319
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
320
}
321
- h = tcg_prepare_user_ldst(s, addr_reg);
322
- tcg_out_qemu_st_direct(s, opc, data_reg, h);
323
-#endif
324
}
325
326
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
65
--
327
--
66
2.25.1
328
2.34.1
67
329
68
330
diff view generated by jsdifflib
1
Move all of the known-zero optimizations into the per-opcode
1
Add tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
functions. Use fold_masks when there is a possibility of the
2
and tcg_out_st_helper_args. These and their subroutines
3
result being determined, and simply set ctx->z_mask otherwise.
3
use the existing knowledge of the host function call abi
4
to load the function call arguments and return results.
5
6
These will be used to simplify the backends in turn.
4
7
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
10
---
9
tcg/optimize.c | 545 ++++++++++++++++++++++++++-----------------------
11
tcg/tcg.c | 475 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
10
1 file changed, 294 insertions(+), 251 deletions(-)
12
1 file changed, 471 insertions(+), 4 deletions(-)
11
13
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/tcg.c b/tcg/tcg.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
16
--- a/tcg/tcg.c
15
+++ b/tcg/optimize.c
17
+++ b/tcg/tcg.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
18
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
17
TCGTempSet temps_used;
19
static int tcg_out_ldst_finalize(TCGContext *s);
18
20
#endif
19
/* In flight values from optimization. */
21
20
- uint64_t z_mask;
22
+typedef struct TCGLdstHelperParam {
21
+ uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
23
+ TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
22
+ uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
24
+ unsigned ntmp;
23
TCGType type;
25
+ int tmp[3];
24
} OptContext;
26
+} TCGLdstHelperParam;
25
27
+
26
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
28
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
27
return false;
29
+ const TCGLdstHelperParam *p)
30
+ __attribute__((unused));
31
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
32
+ bool load_sign, const TCGLdstHelperParam *p)
33
+ __attribute__((unused));
34
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
35
+ const TCGLdstHelperParam *p)
36
+ __attribute__((unused));
37
+
38
TCGContext tcg_init_ctx;
39
__thread TCGContext *tcg_ctx;
40
41
@@ -XXX,XX +XXX,XX @@ void tcg_raise_tb_overflow(TCGContext *s)
42
siglongjmp(s->jmp_trans, -2);
28
}
43
}
29
44
30
+static bool fold_masks(OptContext *ctx, TCGOp *op)
45
+/*
31
+{
46
+ * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
32
+ uint64_t a_mask = ctx->a_mask;
47
+ * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
33
+ uint64_t z_mask = ctx->z_mask;
48
+ *
49
+ * However, tcg_out_helper_load_slots reuses this field to hold an
50
+ * argument slot number (which may designate a argument register or an
51
+ * argument stack slot), converting to TCGReg once all arguments that
52
+ * are destined for the stack are processed.
53
+ */
54
typedef struct TCGMovExtend {
55
- TCGReg dst;
56
+ unsigned dst;
57
TCGReg src;
58
TCGType dst_type;
59
TCGType src_type;
60
@@ -XXX,XX +XXX,XX @@ static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
61
* between the sources and destinations.
62
*/
63
64
-static void __attribute__((unused))
65
-tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
66
- const TCGMovExtend *i2, int scratch)
67
+static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
68
+ const TCGMovExtend *i2, int scratch)
69
{
70
TCGReg src1 = i1->src;
71
TCGReg src2 = i2->src;
72
@@ -XXX,XX +XXX,XX @@ static TCGHelperInfo all_helpers[] = {
73
};
74
static GHashTable *helper_table;
75
76
+/*
77
+ * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
78
+ * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
79
+ * We only use these for layout in tcg_out_ld_helper_ret and
80
+ * tcg_out_st_helper_args, and share them between several of
81
+ * the helpers, with the end result that it's easier to build manually.
82
+ */
83
+
84
+#if TCG_TARGET_REG_BITS == 32
85
+# define dh_typecode_ttl dh_typecode_i32
86
+#else
87
+# define dh_typecode_ttl dh_typecode_i64
88
+#endif
89
+
90
+static TCGHelperInfo info_helper_ld32_mmu = {
91
+ .flags = TCG_CALL_NO_WG,
92
+ .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
93
+ | dh_typemask(env, 1)
94
+ | dh_typemask(tl, 2) /* target_ulong addr */
95
+ | dh_typemask(i32, 3) /* unsigned oi */
96
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
97
+};
98
+
99
+static TCGHelperInfo info_helper_ld64_mmu = {
100
+ .flags = TCG_CALL_NO_WG,
101
+ .typemask = dh_typemask(i64, 0) /* return uint64_t */
102
+ | dh_typemask(env, 1)
103
+ | dh_typemask(tl, 2) /* target_ulong addr */
104
+ | dh_typemask(i32, 3) /* unsigned oi */
105
+ | dh_typemask(ptr, 4) /* uintptr_t ra */
106
+};
107
+
108
+static TCGHelperInfo info_helper_st32_mmu = {
109
+ .flags = TCG_CALL_NO_WG,
110
+ .typemask = dh_typemask(void, 0)
111
+ | dh_typemask(env, 1)
112
+ | dh_typemask(tl, 2) /* target_ulong addr */
113
+ | dh_typemask(i32, 3) /* uint32_t data */
114
+ | dh_typemask(i32, 4) /* unsigned oi */
115
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
116
+};
117
+
118
+static TCGHelperInfo info_helper_st64_mmu = {
119
+ .flags = TCG_CALL_NO_WG,
120
+ .typemask = dh_typemask(void, 0)
121
+ | dh_typemask(env, 1)
122
+ | dh_typemask(tl, 2) /* target_ulong addr */
123
+ | dh_typemask(i64, 3) /* uint64_t data */
124
+ | dh_typemask(i32, 4) /* unsigned oi */
125
+ | dh_typemask(ptr, 5) /* uintptr_t ra */
126
+};
127
+
128
#ifdef CONFIG_TCG_INTERPRETER
129
static ffi_type *typecode_to_ffi(int argmask)
130
{
131
@@ -XXX,XX +XXX,XX @@ static void tcg_context_init(unsigned max_cpus)
132
(gpointer)&all_helpers[i]);
133
}
134
135
+ init_call_layout(&info_helper_ld32_mmu);
136
+ init_call_layout(&info_helper_ld64_mmu);
137
+ init_call_layout(&info_helper_st32_mmu);
138
+ init_call_layout(&info_helper_st64_mmu);
139
+
140
#ifdef CONFIG_TCG_INTERPRETER
141
init_ffi_layouts();
142
#endif
143
@@ -XXX,XX +XXX,XX @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
144
}
145
}
146
147
+/*
148
+ * Similarly for qemu_ld/st slow path helpers.
149
+ * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
150
+ * using only the provided backend tcg_out_* functions.
151
+ */
152
+
153
+static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
154
+{
155
+ int ofs = arg_slot_stk_ofs(slot);
34
+
156
+
35
+ /*
157
+ /*
36
+ * 32-bit ops generate 32-bit results. For the result is zero test
158
+ * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not
37
+ * below, we can ignore high bits, but for further optimizations we
159
+ * require extension to uint64_t, adjust the address for uint32_t.
38
+ * need to record that the high bits contain garbage.
39
+ */
160
+ */
40
+ if (ctx->type == TCG_TYPE_I32) {
161
+ if (HOST_BIG_ENDIAN &&
41
+ ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
162
+ TCG_TARGET_REG_BITS == 64 &&
42
+ a_mask &= MAKE_64BIT_MASK(0, 32);
163
+ type == TCG_TYPE_I32) {
43
+ z_mask &= MAKE_64BIT_MASK(0, 32);
164
+ ofs += 4;
44
+ }
165
+ }
45
+
166
+ return ofs;
46
+ if (z_mask == 0) {
167
+}
47
+ return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
168
+
48
+ }
169
+static void tcg_out_helper_load_regs(TCGContext *s,
49
+ if (a_mask == 0) {
170
+ unsigned nmov, TCGMovExtend *mov,
50
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
171
+ unsigned ntmp, const int *tmp)
51
+ }
172
+{
52
+ return false;
173
+ switch (nmov) {
53
+}
174
+ default:
54
+
175
+ /* The backend must have provided enough temps for the worst case. */
55
/*
176
+ tcg_debug_assert(ntmp + 1 >= nmov);
56
* Convert @op to NOT, if NOT is supported by the host.
177
+
57
* Return true f the conversion is successful, which will still
178
+ for (unsigned i = nmov - 1; i >= 2; --i) {
58
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
179
+ TCGReg dst = mov[i].dst;
59
180
+
60
static bool fold_and(OptContext *ctx, TCGOp *op)
181
+ for (unsigned j = 0; j < i; ++j) {
61
{
182
+ if (dst == mov[j].src) {
62
+ uint64_t z1, z2;
183
+ /*
63
+
184
+ * Conflict.
64
if (fold_const2(ctx, op) ||
185
+ * Copy the source to a temporary, recurse for the
65
fold_xi_to_i(ctx, op, 0) ||
186
+ * remaining moves, perform the extension from our
66
fold_xi_to_x(ctx, op, -1) ||
187
+ * scratch on the way out.
67
fold_xx_to_x(ctx, op)) {
188
+ */
68
return true;
189
+ TCGReg scratch = tmp[--ntmp];
69
}
190
+ tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
70
- return false;
191
+ mov[i].src = scratch;
71
+
192
+
72
+ z1 = arg_info(op->args[1])->z_mask;
193
+ tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
73
+ z2 = arg_info(op->args[2])->z_mask;
194
+ tcg_out_movext1(s, &mov[i]);
74
+ ctx->z_mask = z1 & z2;
195
+ return;
196
+ }
197
+ }
198
+
199
+ /* No conflicts: perform this move and continue. */
200
+ tcg_out_movext1(s, &mov[i]);
201
+ }
202
+ /* fall through for the final two moves */
203
+
204
+ case 2:
205
+ tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
206
+ return;
207
+ case 1:
208
+ tcg_out_movext1(s, mov);
209
+ return;
210
+ case 0:
211
+ g_assert_not_reached();
212
+ }
213
+}
214
+
215
+static void tcg_out_helper_load_slots(TCGContext *s,
216
+ unsigned nmov, TCGMovExtend *mov,
217
+ const TCGLdstHelperParam *parm)
218
+{
219
+ unsigned i;
75
+
220
+
76
+ /*
221
+ /*
77
+ * Known-zeros does not imply known-ones. Therefore unless
222
+ * Start from the end, storing to the stack first.
78
+ * arg2 is constant, we can't infer affected bits from it.
223
+ * This frees those registers, so we need not consider overlap.
79
+ */
224
+ */
80
+ if (arg_is_const(op->args[2])) {
225
+ for (i = nmov; i-- > 0; ) {
81
+ ctx->a_mask = z1 & ~z2;
226
+ unsigned slot = mov[i].dst;
82
+ }
227
+
83
+
228
+ if (arg_slot_reg_p(slot)) {
84
+ return fold_masks(ctx, op);
229
+ goto found_reg;
85
}
230
+ }
86
231
+
87
static bool fold_andc(OptContext *ctx, TCGOp *op)
232
+ TCGReg src = mov[i].src;
88
{
233
+ TCGType dst_type = mov[i].dst_type;
89
+ uint64_t z1;
234
+ MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
90
+
235
+
91
if (fold_const2(ctx, op) ||
236
+ /* The argument is going onto the stack; extend into scratch. */
92
fold_xx_to_i(ctx, op, 0) ||
237
+ if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
93
fold_xi_to_x(ctx, op, 0) ||
238
+ tcg_debug_assert(parm->ntmp != 0);
94
fold_ix_to_not(ctx, op, -1)) {
239
+ mov[i].dst = src = parm->tmp[0];
95
return true;
240
+ tcg_out_movext1(s, &mov[i]);
96
}
241
+ }
97
- return false;
242
+
98
+
243
+ tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
99
+ z1 = arg_info(op->args[1])->z_mask;
244
+ tcg_out_helper_stk_ofs(dst_type, slot));
100
+
245
+ }
246
+ return;
247
+
248
+ found_reg:
101
+ /*
249
+ /*
102
+ * Known-zeros does not imply known-ones. Therefore unless
250
+ * The remaining arguments are in registers.
103
+ * arg2 is constant, we can't infer anything from it.
251
+ * Convert slot numbers to argument registers.
104
+ */
252
+ */
105
+ if (arg_is_const(op->args[2])) {
253
+ nmov = i + 1;
106
+ uint64_t z2 = ~arg_info(op->args[2])->z_mask;
254
+ for (i = 0; i < nmov; ++i) {
107
+ ctx->a_mask = z1 & ~z2;
255
+ mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
108
+ z1 &= z2;
256
+ }
109
+ }
257
+ tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
110
+ ctx->z_mask = z1;
258
+}
111
+
259
+
112
+ return fold_masks(ctx, op);
260
+static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
113
}
261
+ TCGType type, tcg_target_long imm,
114
262
+ const TCGLdstHelperParam *parm)
115
static bool fold_brcond(OptContext *ctx, TCGOp *op)
263
+{
116
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
264
+ if (arg_slot_reg_p(slot)) {
117
265
+ tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
118
static bool fold_bswap(OptContext *ctx, TCGOp *op)
266
+ } else {
119
{
267
+ int ofs = tcg_out_helper_stk_ofs(type, slot);
120
+ uint64_t z_mask, sign;
268
+ if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
121
+
269
+ tcg_debug_assert(parm->ntmp != 0);
122
if (arg_is_const(op->args[1])) {
270
+ tcg_out_movi(s, type, parm->tmp[0], imm);
123
uint64_t t = arg_info(op->args[1])->val;
271
+ tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
124
272
+ }
125
t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
273
+ }
126
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
274
+}
127
}
275
+
128
- return false;
276
+static void tcg_out_helper_load_common_args(TCGContext *s,
129
+
277
+ const TCGLabelQemuLdst *ldst,
130
+ z_mask = arg_info(op->args[1])->z_mask;
278
+ const TCGLdstHelperParam *parm,
131
+ switch (op->opc) {
279
+ const TCGHelperInfo *info,
132
+ case INDEX_op_bswap16_i32:
280
+ unsigned next_arg)
133
+ case INDEX_op_bswap16_i64:
281
+{
134
+ z_mask = bswap16(z_mask);
282
+ TCGMovExtend ptr_mov = {
135
+ sign = INT16_MIN;
283
+ .dst_type = TCG_TYPE_PTR,
284
+ .src_type = TCG_TYPE_PTR,
285
+ .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
286
+ };
287
+ const TCGCallArgumentLoc *loc = &info->in[0];
288
+ TCGType type;
289
+ unsigned slot;
290
+ tcg_target_ulong imm;
291
+
292
+ /*
293
+ * Handle env, which is always first.
294
+ */
295
+ ptr_mov.dst = loc->arg_slot;
296
+ ptr_mov.src = TCG_AREG0;
297
+ tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
298
+
299
+ /*
300
+ * Handle oi.
301
+ */
302
+ imm = ldst->oi;
303
+ loc = &info->in[next_arg];
304
+ type = TCG_TYPE_I32;
305
+ switch (loc->kind) {
306
+ case TCG_CALL_ARG_NORMAL:
136
+ break;
307
+ break;
137
+ case INDEX_op_bswap32_i32:
308
+ case TCG_CALL_ARG_EXTEND_U:
138
+ case INDEX_op_bswap32_i64:
309
+ case TCG_CALL_ARG_EXTEND_S:
139
+ z_mask = bswap32(z_mask);
310
+ /* No extension required for MemOpIdx. */
140
+ sign = INT32_MIN;
311
+ tcg_debug_assert(imm <= INT32_MAX);
141
+ break;
312
+ type = TCG_TYPE_REG;
142
+ case INDEX_op_bswap64_i64:
143
+ z_mask = bswap64(z_mask);
144
+ sign = INT64_MIN;
145
+ break;
313
+ break;
146
+ default:
314
+ default:
147
+ g_assert_not_reached();
315
+ g_assert_not_reached();
148
+ }
316
+ }
149
+
317
+ tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
150
+ switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
318
+ next_arg++;
151
+ case TCG_BSWAP_OZ:
319
+
320
+ /*
321
+ * Handle ra.
322
+ */
323
+ loc = &info->in[next_arg];
324
+ slot = loc->arg_slot;
325
+ if (parm->ra_gen) {
326
+ int arg_reg = -1;
327
+ TCGReg ra_reg;
328
+
329
+ if (arg_slot_reg_p(slot)) {
330
+ arg_reg = tcg_target_call_iarg_regs[slot];
331
+ }
332
+ ra_reg = parm->ra_gen(s, ldst, arg_reg);
333
+
334
+ ptr_mov.dst = slot;
335
+ ptr_mov.src = ra_reg;
336
+ tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
337
+ } else {
338
+ imm = (uintptr_t)ldst->raddr;
339
+ tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
340
+ }
341
+}
342
+
343
+static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
344
+ const TCGCallArgumentLoc *loc,
345
+ TCGType dst_type, TCGType src_type,
346
+ TCGReg lo, TCGReg hi)
347
+{
348
+ if (dst_type <= TCG_TYPE_REG) {
349
+ MemOp src_ext;
350
+
351
+ switch (loc->kind) {
352
+ case TCG_CALL_ARG_NORMAL:
353
+ src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
354
+ break;
355
+ case TCG_CALL_ARG_EXTEND_U:
356
+ dst_type = TCG_TYPE_REG;
357
+ src_ext = MO_UL;
358
+ break;
359
+ case TCG_CALL_ARG_EXTEND_S:
360
+ dst_type = TCG_TYPE_REG;
361
+ src_ext = MO_SL;
362
+ break;
363
+ default:
364
+ g_assert_not_reached();
365
+ }
366
+
367
+ mov[0].dst = loc->arg_slot;
368
+ mov[0].dst_type = dst_type;
369
+ mov[0].src = lo;
370
+ mov[0].src_type = src_type;
371
+ mov[0].src_ext = src_ext;
372
+ return 1;
373
+ }
374
+
375
+ assert(TCG_TARGET_REG_BITS == 32);
376
+
377
+ mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
378
+ mov[0].src = lo;
379
+ mov[0].dst_type = TCG_TYPE_I32;
380
+ mov[0].src_type = TCG_TYPE_I32;
381
+ mov[0].src_ext = MO_32;
382
+
383
+ mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
384
+ mov[1].src = hi;
385
+ mov[1].dst_type = TCG_TYPE_I32;
386
+ mov[1].src_type = TCG_TYPE_I32;
387
+ mov[1].src_ext = MO_32;
388
+
389
+ return 2;
390
+}
391
+
392
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
393
+ const TCGLdstHelperParam *parm)
394
+{
395
+ const TCGHelperInfo *info;
396
+ const TCGCallArgumentLoc *loc;
397
+ TCGMovExtend mov[2];
398
+ unsigned next_arg, nmov;
399
+ MemOp mop = get_memop(ldst->oi);
400
+
401
+ switch (mop & MO_SIZE) {
402
+ case MO_8:
403
+ case MO_16:
404
+ case MO_32:
405
+ info = &info_helper_ld32_mmu;
152
+ break;
406
+ break;
153
+ case TCG_BSWAP_OS:
407
+ case MO_64:
154
+ /* If the sign bit may be 1, force all the bits above to 1. */
408
+ info = &info_helper_ld64_mmu;
155
+ if (z_mask & sign) {
156
+ z_mask |= sign;
157
+ }
158
+ break;
159
+ default:
160
+ /* The high bits are undefined: force all bits above the sign to 1. */
161
+ z_mask |= sign << 1;
162
+ break;
163
+ }
164
+ ctx->z_mask = z_mask;
165
+
166
+ return fold_masks(ctx, op);
167
}
168
169
static bool fold_call(OptContext *ctx, TCGOp *op)
170
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
171
172
static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
173
{
174
+ uint64_t z_mask;
175
+
176
if (arg_is_const(op->args[1])) {
177
uint64_t t = arg_info(op->args[1])->val;
178
179
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
180
}
181
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
182
}
183
+
184
+ switch (ctx->type) {
185
+ case TCG_TYPE_I32:
186
+ z_mask = 31;
187
+ break;
188
+ case TCG_TYPE_I64:
189
+ z_mask = 63;
190
+ break;
409
+ break;
191
+ default:
410
+ default:
192
+ g_assert_not_reached();
411
+ g_assert_not_reached();
193
+ }
412
+ }
194
+ ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
413
+
195
+
414
+ /* Defer env argument. */
196
return false;
415
+ next_arg = 1;
197
}
416
+
198
417
+ loc = &info->in[next_arg];
199
static bool fold_ctpop(OptContext *ctx, TCGOp *op)
418
+ nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
200
{
419
+ ldst->addrlo_reg, ldst->addrhi_reg);
201
- return fold_const1(ctx, op);
420
+ next_arg += nmov;
202
+ if (fold_const1(ctx, op)) {
421
+
203
+ return true;
422
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
204
+ }
423
+
205
+
424
+ /* No special attention for 32 and 64-bit return values. */
206
+ switch (ctx->type) {
425
+ tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
207
+ case TCG_TYPE_I32:
426
+
208
+ ctx->z_mask = 32 | 31;
427
+ tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
428
+}
429
+
430
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
431
+ bool load_sign,
432
+ const TCGLdstHelperParam *parm)
433
+{
434
+ TCGMovExtend mov[2];
435
+
436
+ if (ldst->type <= TCG_TYPE_REG) {
437
+ MemOp mop = get_memop(ldst->oi);
438
+
439
+ mov[0].dst = ldst->datalo_reg;
440
+ mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
441
+ mov[0].dst_type = ldst->type;
442
+ mov[0].src_type = TCG_TYPE_REG;
443
+
444
+ /*
445
+ * If load_sign, then we allowed the helper to perform the
446
+ * appropriate sign extension to tcg_target_ulong, and all
447
+ * we need now is a plain move.
448
+ *
449
+ * If they do not, then we expect the relevant extension
450
+ * instruction to be no more expensive than a move, and
451
+ * we thus save the icache etc by only using one of two
452
+ * helper functions.
453
+ */
454
+ if (load_sign || !(mop & MO_SIGN)) {
455
+ if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
456
+ mov[0].src_ext = MO_32;
457
+ } else {
458
+ mov[0].src_ext = MO_64;
459
+ }
460
+ } else {
461
+ mov[0].src_ext = mop & MO_SSIZE;
462
+ }
463
+ tcg_out_movext1(s, mov);
464
+ } else {
465
+ assert(TCG_TARGET_REG_BITS == 32);
466
+
467
+ mov[0].dst = ldst->datalo_reg;
468
+ mov[0].src =
469
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
470
+ mov[0].dst_type = TCG_TYPE_I32;
471
+ mov[0].src_type = TCG_TYPE_I32;
472
+ mov[0].src_ext = MO_32;
473
+
474
+ mov[1].dst = ldst->datahi_reg;
475
+ mov[1].src =
476
+ tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
477
+ mov[1].dst_type = TCG_TYPE_REG;
478
+ mov[1].src_type = TCG_TYPE_REG;
479
+ mov[1].src_ext = MO_32;
480
+
481
+ tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
482
+ }
483
+}
484
+
485
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
486
+ const TCGLdstHelperParam *parm)
487
+{
488
+ const TCGHelperInfo *info;
489
+ const TCGCallArgumentLoc *loc;
490
+ TCGMovExtend mov[4];
491
+ TCGType data_type;
492
+ unsigned next_arg, nmov, n;
493
+ MemOp mop = get_memop(ldst->oi);
494
+
495
+ switch (mop & MO_SIZE) {
496
+ case MO_8:
497
+ case MO_16:
498
+ case MO_32:
499
+ info = &info_helper_st32_mmu;
500
+ data_type = TCG_TYPE_I32;
209
+ break;
501
+ break;
210
+ case TCG_TYPE_I64:
502
+ case MO_64:
211
+ ctx->z_mask = 64 | 63;
503
+ info = &info_helper_st64_mmu;
504
+ data_type = TCG_TYPE_I64;
212
+ break;
505
+ break;
213
+ default:
506
+ default:
214
+ g_assert_not_reached();
507
+ g_assert_not_reached();
215
+ }
508
+ }
216
+ return false;
509
+
217
}
510
+ /* Defer env argument. */
218
511
+ next_arg = 1;
219
static bool fold_deposit(OptContext *ctx, TCGOp *op)
512
+ nmov = 0;
220
@@ -XXX,XX +XXX,XX @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
513
+
221
t1 = deposit64(t1, op->args[3], op->args[4], t2);
514
+ /* Handle addr argument. */
222
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
515
+ loc = &info->in[next_arg];
223
}
516
+ n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
224
+
517
+ ldst->addrlo_reg, ldst->addrhi_reg);
225
+ ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
518
+ next_arg += n;
226
+ op->args[3], op->args[4],
519
+ nmov += n;
227
+ arg_info(op->args[2])->z_mask);
520
+
228
return false;
521
+ /* Handle data argument. */
229
}
522
+ loc = &info->in[next_arg];
230
523
+ n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
231
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
524
+ ldst->datalo_reg, ldst->datahi_reg);
232
525
+ next_arg += n;
233
static bool fold_extract(OptContext *ctx, TCGOp *op)
526
+ nmov += n;
234
{
527
+ tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
235
+ uint64_t z_mask_old, z_mask;
528
+
236
+
529
+ tcg_out_helper_load_slots(s, nmov, mov, parm);
237
if (arg_is_const(op->args[1])) {
530
+ tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
238
uint64_t t;
531
+}
239
532
+
240
@@ -XXX,XX +XXX,XX @@ static bool fold_extract(OptContext *ctx, TCGOp *op)
533
#ifdef CONFIG_PROFILER
241
t = extract64(t, op->args[2], op->args[3]);
534
242
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
535
/* avoid copy/paste errors */
243
}
244
- return false;
245
+
246
+ z_mask_old = arg_info(op->args[1])->z_mask;
247
+ z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
248
+ if (op->args[2] == 0) {
249
+ ctx->a_mask = z_mask_old ^ z_mask;
250
+ }
251
+ ctx->z_mask = z_mask;
252
+
253
+ return fold_masks(ctx, op);
254
}
255
256
static bool fold_extract2(OptContext *ctx, TCGOp *op)
257
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
258
259
static bool fold_exts(OptContext *ctx, TCGOp *op)
260
{
261
- return fold_const1(ctx, op);
262
+ uint64_t z_mask_old, z_mask, sign;
263
+ bool type_change = false;
264
+
265
+ if (fold_const1(ctx, op)) {
266
+ return true;
267
+ }
268
+
269
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
270
+
271
+ switch (op->opc) {
272
+ CASE_OP_32_64(ext8s):
273
+ sign = INT8_MIN;
274
+ z_mask = (uint8_t)z_mask;
275
+ break;
276
+ CASE_OP_32_64(ext16s):
277
+ sign = INT16_MIN;
278
+ z_mask = (uint16_t)z_mask;
279
+ break;
280
+ case INDEX_op_ext_i32_i64:
281
+ type_change = true;
282
+ QEMU_FALLTHROUGH;
283
+ case INDEX_op_ext32s_i64:
284
+ sign = INT32_MIN;
285
+ z_mask = (uint32_t)z_mask;
286
+ break;
287
+ default:
288
+ g_assert_not_reached();
289
+ }
290
+
291
+ if (z_mask & sign) {
292
+ z_mask |= sign;
293
+ } else if (!type_change) {
294
+ ctx->a_mask = z_mask_old ^ z_mask;
295
+ }
296
+ ctx->z_mask = z_mask;
297
+
298
+ return fold_masks(ctx, op);
299
}
300
301
static bool fold_extu(OptContext *ctx, TCGOp *op)
302
{
303
- return fold_const1(ctx, op);
304
+ uint64_t z_mask_old, z_mask;
305
+ bool type_change = false;
306
+
307
+ if (fold_const1(ctx, op)) {
308
+ return true;
309
+ }
310
+
311
+ z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
312
+
313
+ switch (op->opc) {
314
+ CASE_OP_32_64(ext8u):
315
+ z_mask = (uint8_t)z_mask;
316
+ break;
317
+ CASE_OP_32_64(ext16u):
318
+ z_mask = (uint16_t)z_mask;
319
+ break;
320
+ case INDEX_op_extrl_i64_i32:
321
+ case INDEX_op_extu_i32_i64:
322
+ type_change = true;
323
+ QEMU_FALLTHROUGH;
324
+ case INDEX_op_ext32u_i64:
325
+ z_mask = (uint32_t)z_mask;
326
+ break;
327
+ case INDEX_op_extrh_i64_i32:
328
+ type_change = true;
329
+ z_mask >>= 32;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
+ }
334
+
335
+ ctx->z_mask = z_mask;
336
+ if (!type_change) {
337
+ ctx->a_mask = z_mask_old ^ z_mask;
338
+ }
339
+ return fold_masks(ctx, op);
340
}
341
342
static bool fold_mb(OptContext *ctx, TCGOp *op)
343
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
344
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
345
}
346
347
+ ctx->z_mask = arg_info(op->args[3])->z_mask
348
+ | arg_info(op->args[4])->z_mask;
349
+
350
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
351
uint64_t tv = arg_info(op->args[3])->val;
352
uint64_t fv = arg_info(op->args[4])->val;
353
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
354
355
static bool fold_neg(OptContext *ctx, TCGOp *op)
356
{
357
+ uint64_t z_mask;
358
+
359
if (fold_const1(ctx, op)) {
360
return true;
361
}
362
+
363
+ /* Set to 1 all bits to the left of the rightmost. */
364
+ z_mask = arg_info(op->args[1])->z_mask;
365
+ ctx->z_mask = -(z_mask & -z_mask);
366
+
367
/*
368
* Because of fold_sub_to_neg, we want to always return true,
369
* via finish_folding.
370
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
371
fold_xx_to_x(ctx, op)) {
372
return true;
373
}
374
- return false;
375
+
376
+ ctx->z_mask = arg_info(op->args[1])->z_mask
377
+ | arg_info(op->args[2])->z_mask;
378
+ return fold_masks(ctx, op);
379
}
380
381
static bool fold_orc(OptContext *ctx, TCGOp *op)
382
@@ -XXX,XX +XXX,XX @@ static bool fold_orc(OptContext *ctx, TCGOp *op)
383
384
static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
385
{
386
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
387
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
388
+ MemOp mop = get_memop(oi);
389
+ int width = 8 * memop_size(mop);
390
+
391
+ if (!(mop & MO_SIGN) && width < 64) {
392
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
393
+ }
394
+
395
/* Opcodes that touch guest memory stop the mb optimization. */
396
ctx->prev_mb = NULL;
397
return false;
398
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
399
if (i >= 0) {
400
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
401
}
402
+
403
+ ctx->z_mask = 1;
404
return false;
405
}
406
407
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
408
op->opc = INDEX_op_setcond_i32;
409
break;
410
}
411
+
412
+ ctx->z_mask = 1;
413
return false;
414
415
do_setcond_const:
416
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
417
418
static bool fold_sextract(OptContext *ctx, TCGOp *op)
419
{
420
+ int64_t z_mask_old, z_mask;
421
+
422
if (arg_is_const(op->args[1])) {
423
uint64_t t;
424
425
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
426
t = sextract64(t, op->args[2], op->args[3]);
427
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
428
}
429
- return false;
430
+
431
+ z_mask_old = arg_info(op->args[1])->z_mask;
432
+ z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
433
+ if (op->args[2] == 0 && z_mask >= 0) {
434
+ ctx->a_mask = z_mask_old ^ z_mask;
435
+ }
436
+ ctx->z_mask = z_mask;
437
+
438
+ return fold_masks(ctx, op);
439
}
440
441
static bool fold_shift(OptContext *ctx, TCGOp *op)
442
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
443
fold_xi_to_x(ctx, op, 0)) {
444
return true;
445
}
446
+
447
+ if (arg_is_const(op->args[2])) {
448
+ ctx->z_mask = do_constant_folding(op->opc, ctx->type,
449
+ arg_info(op->args[1])->z_mask,
450
+ arg_info(op->args[2])->val);
451
+ return fold_masks(ctx, op);
452
+ }
453
return false;
454
}
455
456
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
457
return fold_addsub2_i32(ctx, op, false);
458
}
459
460
+static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
461
+{
462
+ /* We can't do any folding with a load, but we can record bits. */
463
+ switch (op->opc) {
464
+ CASE_OP_32_64(ld8u):
465
+ ctx->z_mask = MAKE_64BIT_MASK(0, 8);
466
+ break;
467
+ CASE_OP_32_64(ld16u):
468
+ ctx->z_mask = MAKE_64BIT_MASK(0, 16);
469
+ break;
470
+ case INDEX_op_ld32u_i64:
471
+ ctx->z_mask = MAKE_64BIT_MASK(0, 32);
472
+ break;
473
+ default:
474
+ g_assert_not_reached();
475
+ }
476
+ return false;
477
+}
478
+
479
static bool fold_xor(OptContext *ctx, TCGOp *op)
480
{
481
if (fold_const2(ctx, op) ||
482
@@ -XXX,XX +XXX,XX @@ static bool fold_xor(OptContext *ctx, TCGOp *op)
483
fold_xi_to_not(ctx, op, -1)) {
484
return true;
485
}
486
- return false;
487
+
488
+ ctx->z_mask = arg_info(op->args[1])->z_mask
489
+ | arg_info(op->args[2])->z_mask;
490
+ return fold_masks(ctx, op);
491
}
492
493
/* Propagate constants and copies, fold constant expressions. */
494
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
495
}
496
497
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
498
- uint64_t z_mask, partmask, affected, tmp;
499
TCGOpcode opc = op->opc;
500
const TCGOpDef *def;
501
bool done = false;
502
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
503
break;
504
}
505
506
- /* Simplify using known-zero bits. Currently only ops with a single
507
- output argument is supported. */
508
- z_mask = -1;
509
- affected = -1;
510
- switch (opc) {
511
- CASE_OP_32_64(ext8s):
512
- if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
513
- break;
514
- }
515
- QEMU_FALLTHROUGH;
516
- CASE_OP_32_64(ext8u):
517
- z_mask = 0xff;
518
- goto and_const;
519
- CASE_OP_32_64(ext16s):
520
- if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
521
- break;
522
- }
523
- QEMU_FALLTHROUGH;
524
- CASE_OP_32_64(ext16u):
525
- z_mask = 0xffff;
526
- goto and_const;
527
- case INDEX_op_ext32s_i64:
528
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
529
- break;
530
- }
531
- QEMU_FALLTHROUGH;
532
- case INDEX_op_ext32u_i64:
533
- z_mask = 0xffffffffU;
534
- goto and_const;
535
-
536
- CASE_OP_32_64(and):
537
- z_mask = arg_info(op->args[2])->z_mask;
538
- if (arg_is_const(op->args[2])) {
539
- and_const:
540
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
541
- }
542
- z_mask = arg_info(op->args[1])->z_mask & z_mask;
543
- break;
544
-
545
- case INDEX_op_ext_i32_i64:
546
- if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
547
- break;
548
- }
549
- QEMU_FALLTHROUGH;
550
- case INDEX_op_extu_i32_i64:
551
- /* We do not compute affected as it is a size changing op. */
552
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
553
- break;
554
-
555
- CASE_OP_32_64(andc):
556
- /* Known-zeros does not imply known-ones. Therefore unless
557
- op->args[2] is constant, we can't infer anything from it. */
558
- if (arg_is_const(op->args[2])) {
559
- z_mask = ~arg_info(op->args[2])->z_mask;
560
- goto and_const;
561
- }
562
- /* But we certainly know nothing outside args[1] may be set. */
563
- z_mask = arg_info(op->args[1])->z_mask;
564
- break;
565
-
566
- case INDEX_op_sar_i32:
567
- if (arg_is_const(op->args[2])) {
568
- tmp = arg_info(op->args[2])->val & 31;
569
- z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
570
- }
571
- break;
572
- case INDEX_op_sar_i64:
573
- if (arg_is_const(op->args[2])) {
574
- tmp = arg_info(op->args[2])->val & 63;
575
- z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
576
- }
577
- break;
578
-
579
- case INDEX_op_shr_i32:
580
- if (arg_is_const(op->args[2])) {
581
- tmp = arg_info(op->args[2])->val & 31;
582
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
583
- }
584
- break;
585
- case INDEX_op_shr_i64:
586
- if (arg_is_const(op->args[2])) {
587
- tmp = arg_info(op->args[2])->val & 63;
588
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
589
- }
590
- break;
591
-
592
- case INDEX_op_extrl_i64_i32:
593
- z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
594
- break;
595
- case INDEX_op_extrh_i64_i32:
596
- z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
597
- break;
598
-
599
- CASE_OP_32_64(shl):
600
- if (arg_is_const(op->args[2])) {
601
- tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
602
- z_mask = arg_info(op->args[1])->z_mask << tmp;
603
- }
604
- break;
605
-
606
- CASE_OP_32_64(neg):
607
- /* Set to 1 all bits to the left of the rightmost. */
608
- z_mask = -(arg_info(op->args[1])->z_mask
609
- & -arg_info(op->args[1])->z_mask);
610
- break;
611
-
612
- CASE_OP_32_64(deposit):
613
- z_mask = deposit64(arg_info(op->args[1])->z_mask,
614
- op->args[3], op->args[4],
615
- arg_info(op->args[2])->z_mask);
616
- break;
617
-
618
- CASE_OP_32_64(extract):
619
- z_mask = extract64(arg_info(op->args[1])->z_mask,
620
- op->args[2], op->args[3]);
621
- if (op->args[2] == 0) {
622
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
623
- }
624
- break;
625
- CASE_OP_32_64(sextract):
626
- z_mask = sextract64(arg_info(op->args[1])->z_mask,
627
- op->args[2], op->args[3]);
628
- if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
629
- affected = arg_info(op->args[1])->z_mask & ~z_mask;
630
- }
631
- break;
632
-
633
- CASE_OP_32_64(or):
634
- CASE_OP_32_64(xor):
635
- z_mask = arg_info(op->args[1])->z_mask
636
- | arg_info(op->args[2])->z_mask;
637
- break;
638
-
639
- case INDEX_op_clz_i32:
640
- case INDEX_op_ctz_i32:
641
- z_mask = arg_info(op->args[2])->z_mask | 31;
642
- break;
643
-
644
- case INDEX_op_clz_i64:
645
- case INDEX_op_ctz_i64:
646
- z_mask = arg_info(op->args[2])->z_mask | 63;
647
- break;
648
-
649
- case INDEX_op_ctpop_i32:
650
- z_mask = 32 | 31;
651
- break;
652
- case INDEX_op_ctpop_i64:
653
- z_mask = 64 | 63;
654
- break;
655
-
656
- CASE_OP_32_64(setcond):
657
- case INDEX_op_setcond2_i32:
658
- z_mask = 1;
659
- break;
660
-
661
- CASE_OP_32_64(movcond):
662
- z_mask = arg_info(op->args[3])->z_mask
663
- | arg_info(op->args[4])->z_mask;
664
- break;
665
-
666
- CASE_OP_32_64(ld8u):
667
- z_mask = 0xff;
668
- break;
669
- CASE_OP_32_64(ld16u):
670
- z_mask = 0xffff;
671
- break;
672
- case INDEX_op_ld32u_i64:
673
- z_mask = 0xffffffffu;
674
- break;
675
-
676
- CASE_OP_32_64(qemu_ld):
677
- {
678
- MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
679
- MemOp mop = get_memop(oi);
680
- if (!(mop & MO_SIGN)) {
681
- z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
682
- }
683
- }
684
- break;
685
-
686
- CASE_OP_32_64(bswap16):
687
- z_mask = arg_info(op->args[1])->z_mask;
688
- if (z_mask <= 0xffff) {
689
- op->args[2] |= TCG_BSWAP_IZ;
690
- }
691
- z_mask = bswap16(z_mask);
692
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
693
- case TCG_BSWAP_OZ:
694
- break;
695
- case TCG_BSWAP_OS:
696
- z_mask = (int16_t)z_mask;
697
- break;
698
- default: /* undefined high bits */
699
- z_mask |= MAKE_64BIT_MASK(16, 48);
700
- break;
701
- }
702
- break;
703
-
704
- case INDEX_op_bswap32_i64:
705
- z_mask = arg_info(op->args[1])->z_mask;
706
- if (z_mask <= 0xffffffffu) {
707
- op->args[2] |= TCG_BSWAP_IZ;
708
- }
709
- z_mask = bswap32(z_mask);
710
- switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
711
- case TCG_BSWAP_OZ:
712
- break;
713
- case TCG_BSWAP_OS:
714
- z_mask = (int32_t)z_mask;
715
- break;
716
- default: /* undefined high bits */
717
- z_mask |= MAKE_64BIT_MASK(32, 32);
718
- break;
719
- }
720
- break;
721
-
722
- default:
723
- break;
724
- }
725
-
726
- /* 32-bit ops generate 32-bit results. For the result is zero test
727
- below, we can ignore high bits, but for further optimizations we
728
- need to record that the high bits contain garbage. */
729
- partmask = z_mask;
730
- if (ctx.type == TCG_TYPE_I32) {
731
- z_mask |= ~(tcg_target_ulong)0xffffffffu;
732
- partmask &= 0xffffffffu;
733
- affected &= 0xffffffffu;
734
- }
735
- ctx.z_mask = z_mask;
736
-
737
- if (partmask == 0) {
738
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
739
- continue;
740
- }
741
- if (affected == 0) {
742
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
743
- continue;
744
- }
745
+ /* Assume all bits affected, and no bits known zero. */
746
+ ctx.a_mask = -1;
747
+ ctx.z_mask = -1;
748
749
/*
750
* Process each opcode.
751
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
752
case INDEX_op_extrh_i64_i32:
753
done = fold_extu(&ctx, op);
754
break;
755
+ CASE_OP_32_64(ld8u):
756
+ CASE_OP_32_64(ld16u):
757
+ case INDEX_op_ld32u_i64:
758
+ done = fold_tcg_ld(&ctx, op);
759
+ break;
760
case INDEX_op_mb:
761
done = fold_mb(&ctx, op);
762
break;
763
--
536
--
764
2.25.1
537
2.34.1
765
538
766
539
diff view generated by jsdifflib
1
Pull the "op r, a, a => movi r, 0" optimization into a function,
1
Use tcg_out_ld_helper_args and tcg_out_ld_helper_ret.
2
and use it in the outer opcode fold functions.
3
2
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
5
---
8
tcg/optimize.c | 41 ++++++++++++++++++++++++-----------------
6
tcg/i386/tcg-target.c.inc | 71 +++++++++++++++------------------------
9
1 file changed, 24 insertions(+), 17 deletions(-)
7
1 file changed, 28 insertions(+), 43 deletions(-)
10
8
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
11
--- a/tcg/i386/tcg-target.c.inc
14
+++ b/tcg/optimize.c
12
+++ b/tcg/i386/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
16
return false;
14
[MO_BEUQ] = helper_be_stq_mmu,
17
}
15
};
18
16
19
+/* If the binary operation has both arguments equal, fold to @i. */
17
+/*
20
+static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
18
+ * Because i686 has no register parameters and because x86_64 has xchg
19
+ * to handle addr/data register overlap, we have placed all input arguments
20
+ * before we need might need a scratch reg.
21
+ *
22
+ * Even then, a scratch is only needed for l->raddr. Rather than expose
23
+ * a general-purpose scratch when we don't actually know it's available,
24
+ * use the ra_gen hook to load into RAX if needed.
25
+ */
26
+#if TCG_TARGET_REG_BITS == 64
27
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
21
+{
28
+{
22
+ if (args_are_copies(op->args[1], op->args[2])) {
29
+ if (arg < 0) {
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
30
+ arg = TCG_REG_RAX;
24
+ }
31
+ }
25
+ return false;
32
+ tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
33
+ return arg;
26
+}
34
+}
35
+static const TCGLdstHelperParam ldst_helper_param = {
36
+ .ra_gen = ldst_ra_gen
37
+};
38
+#else
39
+static const TCGLdstHelperParam ldst_helper_param = { };
40
+#endif
27
+
41
+
28
/*
42
/*
29
* These outermost fold_<op> functions are sorted alphabetically.
43
* Generate code for the slow path for a load at the end of block
30
*/
44
*/
31
@@ -XXX,XX +XXX,XX @@ static bool fold_and(OptContext *ctx, TCGOp *op)
45
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
32
33
static bool fold_andc(OptContext *ctx, TCGOp *op)
34
{
46
{
35
- return fold_const2(ctx, op);
47
- MemOpIdx oi = l->oi;
36
+ if (fold_const2(ctx, op) ||
48
- MemOp opc = get_memop(oi);
37
+ fold_xx_to_i(ctx, op, 0)) {
49
+ MemOp opc = get_memop(l->oi);
38
+ return true;
50
tcg_insn_unit **label_ptr = &l->label_ptr[0];
39
+ }
51
40
+ return false;
52
/* resolve label address */
41
}
53
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
42
54
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
43
static bool fold_brcond(OptContext *ctx, TCGOp *op)
55
}
44
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
56
45
57
- if (TCG_TARGET_REG_BITS == 32) {
46
static bool fold_sub(OptContext *ctx, TCGOp *op)
58
- int ofs = 0;
47
{
59
-
48
- return fold_const2(ctx, op);
60
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
49
+ if (fold_const2(ctx, op) ||
61
- ofs += 4;
50
+ fold_xx_to_i(ctx, op, 0)) {
62
-
51
+ return true;
63
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
52
+ }
64
- ofs += 4;
53
+ return false;
65
-
54
}
66
- if (TARGET_LONG_BITS == 64) {
55
67
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
56
static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
68
- ofs += 4;
57
@@ -XXX,XX +XXX,XX @@ static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
58
59
static bool fold_xor(OptContext *ctx, TCGOp *op)
60
{
61
- return fold_const2(ctx, op);
62
+ if (fold_const2(ctx, op) ||
63
+ fold_xx_to_i(ctx, op, 0)) {
64
+ return true;
65
+ }
66
+ return false;
67
}
68
69
/* Propagate constants and copies, fold constant expressions. */
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
break;
72
}
73
74
- /* Simplify expression for "op r, a, a => movi r, 0" cases */
75
- switch (opc) {
76
- CASE_OP_32_64_VEC(andc):
77
- CASE_OP_32_64_VEC(sub):
78
- CASE_OP_32_64_VEC(xor):
79
- if (args_are_copies(op->args[1], op->args[2])) {
80
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
81
- continue;
82
- }
83
- break;
84
- default:
85
- break;
86
- }
69
- }
87
-
70
-
88
/*
71
- tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
89
* Process each opcode.
72
- ofs += 4;
90
* Sorted alphabetically by opcode as much as possible.
73
-
74
- tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
75
- } else {
76
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
77
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
78
- l->addrlo_reg);
79
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
80
- tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
81
- (uintptr_t)l->raddr);
82
- }
83
-
84
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
85
tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
86
+ tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
87
88
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
89
- TCGMovExtend ext[2] = {
90
- { .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
91
- .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
92
- { .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
93
- .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
94
- };
95
- tcg_out_movext2(s, &ext[0], &ext[1], -1);
96
- } else {
97
- tcg_out_movext(s, l->type, l->datalo_reg,
98
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
99
- }
100
-
101
- /* Jump to the code corresponding to next IR of qemu_st */
102
tcg_out_jmp(s, l->raddr);
103
return true;
104
}
91
--
105
--
92
2.25.1
106
2.34.1
93
107
94
108
diff view generated by jsdifflib
1
Compute the type of the operation early.
1
Use tcg_out_st_helper_args. This eliminates the use of a tail call to
2
the store helper. This may or may not be an improvement, depending on
3
the call/return branch prediction of the host microarchitecture.
2
4
3
There are at least 4 places that used a def->flags ladder
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
to determine the type of the operation being optimized.
5
6
There were two places that assumed !TCG_OPF_64BIT means
7
TCG_TYPE_I32, and so could potentially compute incorrect
8
results for vector operations.
9
10
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
12
---
7
---
13
tcg/optimize.c | 149 +++++++++++++++++++++++++++++--------------------
8
tcg/i386/tcg-target.c.inc | 57 +++------------------------------------
14
1 file changed, 89 insertions(+), 60 deletions(-)
9
1 file changed, 4 insertions(+), 53 deletions(-)
15
10
16
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
17
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
18
--- a/tcg/optimize.c
13
--- a/tcg/i386/tcg-target.c.inc
19
+++ b/tcg/optimize.c
14
+++ b/tcg/i386/tcg-target.c.inc
20
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
15
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
21
16
*/
22
/* In flight values from optimization. */
17
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
23
uint64_t z_mask;
24
+ TCGType type;
25
} OptContext;
26
27
static inline TempOptInfo *ts_info(TCGTemp *ts)
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
29
{
18
{
30
TCGTemp *dst_ts = arg_temp(dst);
19
- MemOpIdx oi = l->oi;
31
TCGTemp *src_ts = arg_temp(src);
20
- MemOp opc = get_memop(oi);
32
- const TCGOpDef *def;
21
- MemOp s_bits = opc & MO_SIZE;
33
TempOptInfo *di;
22
+ MemOp opc = get_memop(l->oi);
34
TempOptInfo *si;
23
tcg_insn_unit **label_ptr = &l->label_ptr[0];
35
uint64_t z_mask;
24
- TCGReg retaddr;
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
25
37
reset_ts(dst_ts);
26
/* resolve label address */
38
di = ts_info(dst_ts);
27
tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
39
si = ts_info(src_ts);
28
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
40
- def = &tcg_op_defs[op->opc];
29
tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
41
- if (def->flags & TCG_OPF_VECTOR) {
30
}
42
- new_op = INDEX_op_mov_vec;
31
43
- } else if (def->flags & TCG_OPF_64BIT) {
32
- if (TCG_TARGET_REG_BITS == 32) {
44
- new_op = INDEX_op_mov_i64;
33
- int ofs = 0;
34
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
35
+ tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
36
37
- tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
38
- ofs += 4;
39
-
40
- tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
41
- ofs += 4;
42
-
43
- if (TARGET_LONG_BITS == 64) {
44
- tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
45
- ofs += 4;
46
- }
47
-
48
- tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
49
- ofs += 4;
50
-
51
- if (s_bits == MO_64) {
52
- tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
53
- ofs += 4;
54
- }
55
-
56
- tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
57
- ofs += 4;
58
-
59
- retaddr = TCG_REG_EAX;
60
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
61
- tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
45
- } else {
62
- } else {
46
+
63
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
47
+ switch (ctx->type) {
64
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
48
+ case TCG_TYPE_I32:
65
- l->addrlo_reg);
49
new_op = INDEX_op_mov_i32;
66
- tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
50
+ break;
67
- tcg_target_call_iarg_regs[2], l->datalo_reg);
51
+ case TCG_TYPE_I64:
68
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
52
+ new_op = INDEX_op_mov_i64;
53
+ break;
54
+ case TCG_TYPE_V64:
55
+ case TCG_TYPE_V128:
56
+ case TCG_TYPE_V256:
57
+ /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
58
+ new_op = INDEX_op_mov_vec;
59
+ break;
60
+ default:
61
+ g_assert_not_reached();
62
}
63
op->opc = new_op;
64
- /* TCGOP_VECL and TCGOP_VECE remain unchanged. */
65
op->args[0] = dst;
66
op->args[1] = src;
67
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
69
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
70
TCGArg dst, uint64_t val)
71
{
72
- const TCGOpDef *def = &tcg_op_defs[op->opc];
73
- TCGType type;
74
- TCGTemp *tv;
75
-
69
-
76
- if (def->flags & TCG_OPF_VECTOR) {
70
- if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
77
- type = TCGOP_VECL(op) + TCG_TYPE_V64;
71
- retaddr = tcg_target_call_iarg_regs[4];
78
- } else if (def->flags & TCG_OPF_64BIT) {
72
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
79
- type = TCG_TYPE_I64;
73
- } else {
80
- } else {
74
- retaddr = TCG_REG_RAX;
81
- type = TCG_TYPE_I32;
75
- tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
76
- tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
77
- TCG_TARGET_CALL_STACK_OFFSET);
78
- }
82
- }
79
- }
83
-
80
-
84
/* Convert movi to mov with constant temp. */
81
- /* "Tail call" to the helper, with the return address back inline. */
85
- tv = tcg_constant_internal(type, val);
82
- tcg_out_push(s, retaddr);
86
+ TCGTemp *tv = tcg_constant_internal(ctx->type, val);
83
- tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
87
+
84
+ tcg_out_jmp(s, l->raddr);
88
init_ts_info(ctx, tv);
85
return true;
89
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
90
}
86
}
91
@@ -XXX,XX +XXX,XX @@ static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
87
#else
92
}
93
}
94
95
-static uint64_t do_constant_folding(TCGOpcode op, uint64_t x, uint64_t y)
96
+static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
97
+ uint64_t x, uint64_t y)
98
{
99
- const TCGOpDef *def = &tcg_op_defs[op];
100
uint64_t res = do_constant_folding_2(op, x, y);
101
- if (!(def->flags & TCG_OPF_64BIT)) {
102
+ if (type == TCG_TYPE_I32) {
103
res = (int32_t)res;
104
}
105
return res;
106
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
107
* Return -1 if the condition can't be simplified,
108
* and the result of the condition (0 or 1) if it can.
109
*/
110
-static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
111
+static int do_constant_folding_cond(TCGType type, TCGArg x,
112
TCGArg y, TCGCond c)
113
{
114
uint64_t xv = arg_info(x)->val;
115
uint64_t yv = arg_info(y)->val;
116
117
if (arg_is_const(x) && arg_is_const(y)) {
118
- const TCGOpDef *def = &tcg_op_defs[op];
119
- tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
120
- if (def->flags & TCG_OPF_64BIT) {
121
- return do_constant_folding_cond_64(xv, yv, c);
122
- } else {
123
+ switch (type) {
124
+ case TCG_TYPE_I32:
125
return do_constant_folding_cond_32(xv, yv, c);
126
+ case TCG_TYPE_I64:
127
+ return do_constant_folding_cond_64(xv, yv, c);
128
+ default:
129
+ /* Only scalar comparisons are optimizable */
130
+ return -1;
131
}
132
} else if (args_are_copies(x, y)) {
133
return do_constant_folding_cond_eq(c);
134
@@ -XXX,XX +XXX,XX @@ static bool fold_const1(OptContext *ctx, TCGOp *op)
135
uint64_t t;
136
137
t = arg_info(op->args[1])->val;
138
- t = do_constant_folding(op->opc, t, 0);
139
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
140
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
141
}
142
return false;
143
@@ -XXX,XX +XXX,XX @@ static bool fold_const2(OptContext *ctx, TCGOp *op)
144
uint64_t t1 = arg_info(op->args[1])->val;
145
uint64_t t2 = arg_info(op->args[2])->val;
146
147
- t1 = do_constant_folding(op->opc, t1, t2);
148
+ t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
149
return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
150
}
151
return false;
152
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
153
static bool fold_brcond(OptContext *ctx, TCGOp *op)
154
{
155
TCGCond cond = op->args[2];
156
- int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
157
+ int i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
158
159
if (i == 0) {
160
tcg_op_remove(ctx->tcg, op);
161
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
162
* Simplify EQ/NE comparisons where one of the pairs
163
* can be simplified.
164
*/
165
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[0],
166
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
167
op->args[2], cond);
168
switch (i ^ inv) {
169
case 0:
170
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
171
goto do_brcond_high;
172
}
173
174
- i = do_constant_folding_cond(INDEX_op_brcond_i32, op->args[1],
175
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
176
op->args[3], cond);
177
switch (i ^ inv) {
178
case 0:
179
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
180
if (arg_is_const(op->args[1])) {
181
uint64_t t = arg_info(op->args[1])->val;
182
183
- t = do_constant_folding(op->opc, t, op->args[2]);
184
+ t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
185
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
186
}
187
return false;
188
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
189
uint64_t t = arg_info(op->args[1])->val;
190
191
if (t != 0) {
192
- t = do_constant_folding(op->opc, t, 0);
193
+ t = do_constant_folding(op->opc, ctx->type, t, 0);
194
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
195
}
196
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
197
@@ -XXX,XX +XXX,XX @@ static bool fold_mov(OptContext *ctx, TCGOp *op)
198
199
static bool fold_movcond(OptContext *ctx, TCGOp *op)
200
{
201
- TCGOpcode opc = op->opc;
202
TCGCond cond = op->args[5];
203
- int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
204
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
205
206
if (i >= 0) {
207
return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
208
@@ -XXX,XX +XXX,XX @@ static bool fold_movcond(OptContext *ctx, TCGOp *op)
209
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
210
uint64_t tv = arg_info(op->args[3])->val;
211
uint64_t fv = arg_info(op->args[4])->val;
212
+ TCGOpcode opc;
213
214
- opc = (opc == INDEX_op_movcond_i32
215
- ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
216
+ switch (ctx->type) {
217
+ case TCG_TYPE_I32:
218
+ opc = INDEX_op_setcond_i32;
219
+ break;
220
+ case TCG_TYPE_I64:
221
+ opc = INDEX_op_setcond_i64;
222
+ break;
223
+ default:
224
+ g_assert_not_reached();
225
+ }
226
227
if (tv == 1 && fv == 0) {
228
op->opc = opc;
229
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
230
static bool fold_setcond(OptContext *ctx, TCGOp *op)
231
{
232
TCGCond cond = op->args[3];
233
- int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
234
+ int i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
235
236
if (i >= 0) {
237
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
238
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
239
* Simplify EQ/NE comparisons where one of the pairs
240
* can be simplified.
241
*/
242
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
243
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
244
op->args[3], cond);
245
switch (i ^ inv) {
246
case 0:
247
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
248
goto do_setcond_high;
249
}
250
251
- i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
252
+ i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
253
op->args[4], cond);
254
switch (i ^ inv) {
255
case 0:
256
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
257
init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
258
copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
259
260
+ /* Pre-compute the type of the operation. */
261
+ if (def->flags & TCG_OPF_VECTOR) {
262
+ ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
263
+ } else if (def->flags & TCG_OPF_64BIT) {
264
+ ctx.type = TCG_TYPE_I64;
265
+ } else {
266
+ ctx.type = TCG_TYPE_I32;
267
+ }
268
+
269
/* For commutative operations make constant second argument */
270
switch (opc) {
271
CASE_OP_32_64_VEC(add):
272
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
273
/* Proceed with possible constant folding. */
274
break;
275
}
276
- if (opc == INDEX_op_sub_i32) {
277
+ switch (ctx.type) {
278
+ case TCG_TYPE_I32:
279
neg_op = INDEX_op_neg_i32;
280
have_neg = TCG_TARGET_HAS_neg_i32;
281
- } else if (opc == INDEX_op_sub_i64) {
282
+ break;
283
+ case TCG_TYPE_I64:
284
neg_op = INDEX_op_neg_i64;
285
have_neg = TCG_TARGET_HAS_neg_i64;
286
- } else if (TCG_TARGET_HAS_neg_vec) {
287
- TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
288
- unsigned vece = TCGOP_VECE(op);
289
- neg_op = INDEX_op_neg_vec;
290
- have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
291
- } else {
292
break;
293
+ case TCG_TYPE_V64:
294
+ case TCG_TYPE_V128:
295
+ case TCG_TYPE_V256:
296
+ neg_op = INDEX_op_neg_vec;
297
+ have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
298
+ TCGOP_VECE(op)) > 0;
299
+ break;
300
+ default:
301
+ g_assert_not_reached();
302
}
303
if (!have_neg) {
304
break;
305
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
306
TCGOpcode not_op;
307
bool have_not;
308
309
- if (def->flags & TCG_OPF_VECTOR) {
310
- not_op = INDEX_op_not_vec;
311
- have_not = TCG_TARGET_HAS_not_vec;
312
- } else if (def->flags & TCG_OPF_64BIT) {
313
- not_op = INDEX_op_not_i64;
314
- have_not = TCG_TARGET_HAS_not_i64;
315
- } else {
316
+ switch (ctx.type) {
317
+ case TCG_TYPE_I32:
318
not_op = INDEX_op_not_i32;
319
have_not = TCG_TARGET_HAS_not_i32;
320
+ break;
321
+ case TCG_TYPE_I64:
322
+ not_op = INDEX_op_not_i64;
323
+ have_not = TCG_TARGET_HAS_not_i64;
324
+ break;
325
+ case TCG_TYPE_V64:
326
+ case TCG_TYPE_V128:
327
+ case TCG_TYPE_V256:
328
+ not_op = INDEX_op_not_vec;
329
+ have_not = TCG_TARGET_HAS_not_vec;
330
+ break;
331
+ default:
332
+ g_assert_not_reached();
333
}
334
if (!have_not) {
335
break;
336
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
337
below, we can ignore high bits, but for further optimizations we
338
need to record that the high bits contain garbage. */
339
partmask = z_mask;
340
- if (!(def->flags & TCG_OPF_64BIT)) {
341
+ if (ctx.type == TCG_TYPE_I32) {
342
z_mask |= ~(tcg_target_ulong)0xffffffffu;
343
partmask &= 0xffffffffu;
344
affected &= 0xffffffffu;
345
--
88
--
346
2.25.1
89
2.34.1
347
90
348
91
diff view generated by jsdifflib
1
Calls are special in that they have a variable number
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
of arguments, and need to be able to clobber globals.
2
and tcg_out_st_helper_args.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 63 ++++++++++++++++++++++++++++++++------------------
7
tcg/aarch64/tcg-target.c.inc | 40 +++++++++++++++---------------------
9
1 file changed, 41 insertions(+), 22 deletions(-)
8
1 file changed, 16 insertions(+), 24 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/tcg/aarch64/tcg-target.c.inc
14
+++ b/tcg/optimize.c
13
+++ b/tcg/aarch64/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
16
}
15
}
17
}
16
}
18
17
19
+static bool fold_call(OptContext *ctx, TCGOp *op)
18
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
19
-{
20
- ptrdiff_t offset = tcg_pcrel_diff(s, target);
21
- tcg_debug_assert(offset == sextract64(offset, 0, 21));
22
- tcg_out_insn(s, 3406, ADR, rd, offset);
23
-}
24
-
25
typedef struct {
26
TCGReg base;
27
TCGReg index;
28
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
29
#endif
30
};
31
32
+static const TCGLdstHelperParam ldst_helper_param = {
33
+ .ntmp = 1, .tmp = { TCG_REG_TMP }
34
+};
35
+
36
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
37
{
38
- MemOpIdx oi = lb->oi;
39
- MemOp opc = get_memop(oi);
40
+ MemOp opc = get_memop(lb->oi);
41
42
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
43
return false;
44
}
45
46
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
47
- tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
48
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
49
- tcg_out_adr(s, TCG_REG_X3, lb->raddr);
50
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
51
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
52
-
53
- tcg_out_movext(s, lb->type, lb->datalo_reg,
54
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
55
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
56
tcg_out_goto(s, lb->raddr);
57
return true;
58
}
59
60
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
61
{
62
- MemOpIdx oi = lb->oi;
63
- MemOp opc = get_memop(oi);
64
- MemOp size = opc & MO_SIZE;
65
+ MemOp opc = get_memop(lb->oi);
66
67
if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
68
return false;
69
}
70
71
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
72
- tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
73
- tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
74
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
75
- tcg_out_adr(s, TCG_REG_X4, lb->raddr);
76
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
77
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
78
tcg_out_goto(s, lb->raddr);
79
return true;
80
}
81
#else
82
+static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
20
+{
83
+{
21
+ TCGContext *s = ctx->tcg;
84
+ ptrdiff_t offset = tcg_pcrel_diff(s, target);
22
+ int nb_oargs = TCGOP_CALLO(op);
85
+ tcg_debug_assert(offset == sextract64(offset, 0, 21));
23
+ int nb_iargs = TCGOP_CALLI(op);
86
+ tcg_out_insn(s, 3406, ADR, rd, offset);
24
+ int flags, i;
25
+
26
+ init_arguments(ctx, op, nb_oargs + nb_iargs);
27
+ copy_propagate(ctx, op, nb_oargs, nb_iargs);
28
+
29
+ /* If the function reads or writes globals, reset temp data. */
30
+ flags = tcg_call_flags(op);
31
+ if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
32
+ int nb_globals = s->nb_globals;
33
+
34
+ for (i = 0; i < nb_globals; i++) {
35
+ if (test_bit(i, ctx->temps_used.l)) {
36
+ reset_ts(&ctx->tcg->temps[i]);
37
+ }
38
+ }
39
+ }
40
+
41
+ /* Reset temp data for outputs. */
42
+ for (i = 0; i < nb_oargs; i++) {
43
+ reset_temp(op->args[i]);
44
+ }
45
+
46
+ /* Stop optimizing MB across calls. */
47
+ ctx->prev_mb = NULL;
48
+ return true;
49
+}
87
+}
50
+
88
+
51
/* Propagate constants and copies, fold constant expressions. */
89
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
52
void tcg_optimize(TCGContext *s)
53
{
90
{
54
- int nb_temps, nb_globals, i;
91
if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
55
+ int nb_temps, i;
56
TCGOp *op, *op_next;
57
OptContext ctx = { .tcg = s };
58
59
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
60
available through the doubly linked circular list. */
61
62
nb_temps = s->nb_temps;
63
- nb_globals = s->nb_globals;
64
-
65
for (i = 0; i < nb_temps; ++i) {
66
s->temps[i].state_ptr = NULL;
67
}
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
uint64_t z_mask, partmask, affected, tmp;
70
int nb_oargs, nb_iargs;
71
TCGOpcode opc = op->opc;
72
- const TCGOpDef *def = &tcg_op_defs[opc];
73
+ const TCGOpDef *def;
74
75
- /* Count the arguments, and initialize the temps that are
76
- going to be used */
77
+ /* Calls are special. */
78
if (opc == INDEX_op_call) {
79
- nb_oargs = TCGOP_CALLO(op);
80
- nb_iargs = TCGOP_CALLI(op);
81
- } else {
82
- nb_oargs = def->nb_oargs;
83
- nb_iargs = def->nb_iargs;
84
+ fold_call(&ctx, op);
85
+ continue;
86
}
87
+
88
+ def = &tcg_op_defs[opc];
89
+ nb_oargs = def->nb_oargs;
90
+ nb_iargs = def->nb_iargs;
91
init_arguments(&ctx, op, nb_oargs + nb_iargs);
92
copy_propagate(&ctx, op, nb_oargs, nb_iargs);
93
94
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
95
if (def->flags & TCG_OPF_BB_END) {
96
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
97
} else {
98
- if (opc == INDEX_op_call &&
99
- !(tcg_call_flags(op)
100
- & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
101
- for (i = 0; i < nb_globals; i++) {
102
- if (test_bit(i, ctx.temps_used.l)) {
103
- reset_ts(&s->temps[i]);
104
- }
105
- }
106
- }
107
-
108
for (i = 0; i < nb_oargs; i++) {
109
reset_temp(op->args[i]);
110
/* Save the corresponding known-zero bits mask for the
111
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
112
case INDEX_op_qemu_st_i32:
113
case INDEX_op_qemu_st8_i32:
114
case INDEX_op_qemu_st_i64:
115
- case INDEX_op_call:
116
/* Opcodes that touch guest memory stop the optimization. */
117
ctx.prev_mb = NULL;
118
break;
119
--
92
--
120
2.25.1
93
2.34.1
121
94
122
95
diff view generated by jsdifflib
1
The result is either 0 or 1, which means that we have
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
a 2 bit signed result, and thus 62 bits of sign.
2
and tcg_out_st_helper_args. This allows our local
3
For clarity, use the smask_from_zmask function.
3
tcg_out_arg_* infrastructure to be removed.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 2 ++
8
tcg/arm/tcg-target.c.inc | 140 +++++----------------------------------
10
1 file changed, 2 insertions(+)
9
1 file changed, 18 insertions(+), 122 deletions(-)
11
10
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
13
--- a/tcg/arm/tcg-target.c.inc
15
+++ b/tcg/optimize.c
14
+++ b/tcg/arm/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, TCGReg rm)
16
tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
17
}
18
19
-static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
20
- TCGReg rn, int imm8)
21
+static void __attribute__((unused))
22
+tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8)
23
{
24
tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
25
}
26
@@ -XXX,XX +XXX,XX @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
27
tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff);
28
}
29
30
-static void __attribute__((unused))
31
-tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
32
-{
33
- tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
34
-}
35
-
36
static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn)
37
{
38
/* sxth */
39
tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn);
40
}
41
42
-static void tcg_out_ext16u_cond(TCGContext *s, ARMCond cond,
43
- TCGReg rd, TCGReg rn)
44
-{
45
- /* uxth */
46
- tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
47
-}
48
-
49
static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
50
{
51
- tcg_out_ext16u_cond(s, COND_AL, rd, rn);
52
+ /* uxth */
53
+ tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn);
54
}
55
56
static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
57
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
58
#endif
59
};
60
61
-/* Helper routines for marshalling helper function arguments into
62
- * the correct registers and stack.
63
- * argreg is where we want to put this argument, arg is the argument itself.
64
- * Return value is the updated argreg ready for the next call.
65
- * Note that argreg 0..3 is real registers, 4+ on stack.
66
- *
67
- * We provide routines for arguments which are: immediate, 32 bit
68
- * value in register, 16 and 8 bit values in register (which must be zero
69
- * extended before use) and 64 bit value in a lo:hi register pair.
70
- */
71
-#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
72
-static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
73
-{ \
74
- if (argreg < 4) { \
75
- MOV_ARG(s, COND_AL, argreg, arg); \
76
- } else { \
77
- int ofs = (argreg - 4) * 4; \
78
- EXT_ARG; \
79
- tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
80
- tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
81
- } \
82
- return argreg + 1; \
83
-}
84
-
85
-DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
86
- (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
87
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond,
88
- (tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
89
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u_cond,
90
- (tcg_out_ext16u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
91
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
92
-
93
-static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
94
- TCGReg arglo, TCGReg arghi)
95
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
96
{
97
- /* 64 bit arguments must go in even/odd register pairs
98
- * and in 8-aligned stack slots.
99
- */
100
- if (argreg & 1) {
101
- argreg++;
102
- }
103
- if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) {
104
- tcg_out_strd_8(s, COND_AL, arglo,
105
- TCG_REG_CALL_STACK, (argreg - 4) * 4);
106
- return argreg + 2;
107
- } else {
108
- argreg = tcg_out_arg_reg32(s, argreg, arglo);
109
- argreg = tcg_out_arg_reg32(s, argreg, arghi);
110
- return argreg;
111
- }
112
+ /* We arrive at the slow path via "BLNE", so R14 contains l->raddr. */
113
+ return TCG_REG_R14;
114
}
115
116
+static const TCGLdstHelperParam ldst_helper_param = {
117
+ .ra_gen = ldst_ra_gen,
118
+ .ntmp = 1,
119
+ .tmp = { TCG_REG_TMP },
120
+};
121
+
122
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
123
{
124
- TCGReg argreg;
125
- MemOpIdx oi = lb->oi;
126
- MemOp opc = get_memop(oi);
127
+ MemOp opc = get_memop(lb->oi);
128
129
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
130
return false;
17
}
131
}
18
132
19
ctx->z_mask = 1;
133
- argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
20
+ ctx->s_mask = smask_from_zmask(1);
134
- if (TARGET_LONG_BITS == 64) {
21
return false;
135
- argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
22
}
136
- } else {
23
137
- argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
24
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
138
- }
139
- argreg = tcg_out_arg_imm32(s, argreg, oi);
140
- argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
141
-
142
- /* Use the canonical unsigned helpers and minimize icache usage. */
143
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
144
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
145
-
146
- if ((opc & MO_SIZE) == MO_64) {
147
- TCGMovExtend ext[2] = {
148
- { .dst = lb->datalo_reg, .dst_type = TCG_TYPE_I32,
149
- .src = TCG_REG_R0, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
150
- { .dst = lb->datahi_reg, .dst_type = TCG_TYPE_I32,
151
- .src = TCG_REG_R1, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
152
- };
153
- tcg_out_movext2(s, &ext[0], &ext[1], TCG_REG_TMP);
154
- } else {
155
- tcg_out_movext(s, TCG_TYPE_I32, lb->datalo_reg,
156
- TCG_TYPE_I32, opc & MO_SSIZE, TCG_REG_R0);
157
- }
158
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
159
160
tcg_out_goto(s, COND_AL, lb->raddr);
161
return true;
162
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
163
164
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
165
{
166
- TCGReg argreg, datalo, datahi;
167
- MemOpIdx oi = lb->oi;
168
- MemOp opc = get_memop(oi);
169
+ MemOp opc = get_memop(lb->oi);
170
171
if (!reloc_pc24(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
172
return false;
25
}
173
}
26
174
27
ctx->z_mask = 1;
175
- argreg = TCG_REG_R0;
28
+ ctx->s_mask = smask_from_zmask(1);
176
- argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
29
return false;
177
- if (TARGET_LONG_BITS == 64) {
30
178
- argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
31
do_setcond_const:
179
- } else {
180
- argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
181
- }
182
-
183
- datalo = lb->datalo_reg;
184
- datahi = lb->datahi_reg;
185
- switch (opc & MO_SIZE) {
186
- case MO_8:
187
- argreg = tcg_out_arg_reg8(s, argreg, datalo);
188
- break;
189
- case MO_16:
190
- argreg = tcg_out_arg_reg16(s, argreg, datalo);
191
- break;
192
- case MO_32:
193
- default:
194
- argreg = tcg_out_arg_reg32(s, argreg, datalo);
195
- break;
196
- case MO_64:
197
- argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
198
- break;
199
- }
200
-
201
- argreg = tcg_out_arg_imm32(s, argreg, oi);
202
- argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
203
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
204
205
/* Tail-call to the helper, which will return to the fast path. */
206
tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & MO_SIZE]);
32
--
207
--
33
2.25.1
208
2.34.1
34
209
35
210
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
and tcg_out_st_helper_args.
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/optimize.c | 48 ++++++++++++++++++++++++++++++------------------
7
tcg/loongarch64/tcg-target.c.inc | 37 ++++++++++----------------------
6
1 file changed, 30 insertions(+), 18 deletions(-)
8
1 file changed, 11 insertions(+), 26 deletions(-)
7
9
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
9
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
12
--- a/tcg/loongarch64/tcg-target.c.inc
11
+++ b/tcg/optimize.c
13
+++ b/tcg/loongarch64/tcg-target.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
13
return fold_const2(ctx, op);
15
return reloc_br_sd10k16(s->code_ptr - 1, target);
14
}
16
}
15
17
16
+static bool fold_extract(OptContext *ctx, TCGOp *op)
18
+static const TCGLdstHelperParam ldst_helper_param = {
17
+{
19
+ .ntmp = 1, .tmp = { TCG_REG_TMP0 }
18
+ if (arg_is_const(op->args[1])) {
20
+};
19
+ uint64_t t;
20
+
21
+
21
+ t = arg_info(op->args[1])->val;
22
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
22
+ t = extract64(t, op->args[2], op->args[3]);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
24
+ }
25
+ return false;
26
+}
27
+
28
static bool fold_extract2(OptContext *ctx, TCGOp *op)
29
{
23
{
30
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
24
- MemOpIdx oi = l->oi;
31
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
25
- MemOp opc = get_memop(oi);
32
return tcg_opt_gen_movi(ctx, op, op->args[0], i);
26
- MemOp size = opc & MO_SIZE;
27
+ MemOp opc = get_memop(l->oi);
28
29
/* resolve label address */
30
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
31
return false;
32
}
33
34
- /* call load helper */
35
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
36
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
37
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
38
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
39
-
40
- tcg_out_call_int(s, qemu_ld_helpers[size], false);
41
-
42
- tcg_out_movext(s, l->type, l->datalo_reg,
43
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_A0);
44
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
45
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false);
46
+ tcg_out_ld_helper_ret(s, l, false, &ldst_helper_param);
47
return tcg_out_goto(s, l->raddr);
33
}
48
}
34
49
35
+static bool fold_sextract(OptContext *ctx, TCGOp *op)
50
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
36
+{
37
+ if (arg_is_const(op->args[1])) {
38
+ uint64_t t;
39
+
40
+ t = arg_info(op->args[1])->val;
41
+ t = sextract64(t, op->args[2], op->args[3]);
42
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
43
+ }
44
+ return false;
45
+}
46
+
47
static bool fold_shift(OptContext *ctx, TCGOp *op)
48
{
51
{
49
return fold_const2(ctx, op);
52
- MemOpIdx oi = l->oi;
50
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
53
- MemOp opc = get_memop(oi);
51
}
54
- MemOp size = opc & MO_SIZE;
52
break;
55
+ MemOp opc = get_memop(l->oi);
53
56
54
- CASE_OP_32_64(extract):
57
/* resolve label address */
55
- if (arg_is_const(op->args[1])) {
58
if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
56
- tmp = extract64(arg_info(op->args[1])->val,
59
return false;
57
- op->args[2], op->args[3]);
60
}
58
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
61
59
- continue;
62
- /* call store helper */
60
- }
63
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
61
- break;
64
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
65
- tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I32 : TCG_TYPE_I32, TCG_REG_A2,
66
- l->type, size, l->datalo_reg);
67
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
68
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
62
-
69
-
63
- CASE_OP_32_64(sextract):
70
- tcg_out_call_int(s, qemu_st_helpers[size], false);
64
- if (arg_is_const(op->args[1])) {
65
- tmp = sextract64(arg_info(op->args[1])->val,
66
- op->args[2], op->args[3]);
67
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
68
- continue;
69
- }
70
- break;
71
-
71
-
72
default:
72
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
73
break;
73
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
74
74
return tcg_out_goto(s, l->raddr);
75
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
75
}
76
CASE_OP_32_64(eqv):
76
#else
77
done = fold_eqv(&ctx, op);
78
break;
79
+ CASE_OP_32_64(extract):
80
+ done = fold_extract(&ctx, op);
81
+ break;
82
CASE_OP_32_64(extract2):
83
done = fold_extract2(&ctx, op);
84
break;
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
86
case INDEX_op_setcond2_i32:
87
done = fold_setcond2(&ctx, op);
88
break;
89
+ CASE_OP_32_64(sextract):
90
+ done = fold_sextract(&ctx, op);
91
+ break;
92
CASE_OP_32_64_VEC(sub):
93
done = fold_sub(&ctx, op);
94
break;
95
--
77
--
96
2.25.1
78
2.34.1
97
79
98
80
diff view generated by jsdifflib
1
Even though there is only one user, place this more complex
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
conversion into its own helper.
2
and tcg_out_st_helper_args. This allows our local
3
3
tcg_out_arg_* infrastructure to be removed.
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
5
We are no longer filling the call or return branch
6
delay slots, nor are we tail-calling for the store,
7
but this seems a small price to pay.
8
9
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
11
---
7
tcg/optimize.c | 89 ++++++++++++++++++++++++++------------------------
12
tcg/mips/tcg-target.c.inc | 154 ++++++--------------------------------
8
1 file changed, 47 insertions(+), 42 deletions(-)
13
1 file changed, 22 insertions(+), 132 deletions(-)
9
14
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
17
--- a/tcg/mips/tcg-target.c.inc
13
+++ b/tcg/optimize.c
18
+++ b/tcg/mips/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_nand(OptContext *ctx, TCGOp *op)
19
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
15
20
[MO_BEUQ] = helper_be_stq_mmu,
16
static bool fold_neg(OptContext *ctx, TCGOp *op)
21
};
22
23
-/* Helper routines for marshalling helper function arguments into
24
- * the correct registers and stack.
25
- * I is where we want to put this argument, and is updated and returned
26
- * for the next call. ARG is the argument itself.
27
- *
28
- * We provide routines for arguments which are: immediate, 32 bit
29
- * value in register, 16 and 8 bit values in register (which must be zero
30
- * extended before use) and 64 bit value in a lo:hi register pair.
31
- */
32
-
33
-static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
34
-{
35
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
36
- tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
37
- } else {
38
- /* For N32 and N64, the initial offset is different. But there
39
- we also have 8 argument register so we don't run out here. */
40
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
41
- tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
42
- }
43
- return i + 1;
44
-}
45
-
46
-static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
47
-{
48
- TCGReg tmp = TCG_TMP0;
49
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
50
- tmp = tcg_target_call_iarg_regs[i];
51
- }
52
- tcg_out_ext8u(s, tmp, arg);
53
- return tcg_out_call_iarg_reg(s, i, tmp);
54
-}
55
-
56
-static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
57
-{
58
- TCGReg tmp = TCG_TMP0;
59
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
60
- tmp = tcg_target_call_iarg_regs[i];
61
- }
62
- tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff);
63
- return tcg_out_call_iarg_reg(s, i, tmp);
64
-}
65
-
66
-static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
67
-{
68
- TCGReg tmp = TCG_TMP0;
69
- if (arg == 0) {
70
- tmp = TCG_REG_ZERO;
71
- } else {
72
- if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
73
- tmp = tcg_target_call_iarg_regs[i];
74
- }
75
- tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
76
- }
77
- return tcg_out_call_iarg_reg(s, i, tmp);
78
-}
79
-
80
-static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
81
-{
82
- tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
83
- i = (i + 1) & ~1;
84
- i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
85
- i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
86
- return i;
87
-}
88
+/* We have four temps, we might as well expose three of them. */
89
+static const TCGLdstHelperParam ldst_helper_param = {
90
+ .ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
91
+};
92
93
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
17
{
94
{
18
- return fold_const1(ctx, op);
95
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
19
+ if (fold_const1(ctx, op)) {
96
- MemOpIdx oi = l->oi;
20
+ return true;
97
- MemOp opc = get_memop(oi);
21
+ }
98
- TCGReg v0;
22
+ /*
99
- int i;
23
+ * Because of fold_sub_to_neg, we want to always return true,
100
+ MemOp opc = get_memop(l->oi);
24
+ * via finish_folding.
101
25
+ */
102
/* resolve label address */
26
+ finish_folding(ctx, op);
103
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
27
+ return true;
104
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
105
return false;
106
}
107
108
- i = 1;
109
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
110
- i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
111
- } else {
112
- i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
113
- }
114
- i = tcg_out_call_iarg_imm(s, i, oi);
115
- i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
116
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
117
+
118
tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
119
/* delay slot */
120
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
121
+ tcg_out_nop(s);
122
123
- v0 = l->datalo_reg;
124
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
125
- /* We eliminated V0 from the possible output registers, so it
126
- cannot be clobbered here. So we must move V1 first. */
127
- if (MIPS_BE) {
128
- tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1);
129
- v0 = l->datahi_reg;
130
- } else {
131
- tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
132
- }
133
- }
134
+ tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
135
136
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
137
if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
138
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
139
}
140
141
/* delay slot */
142
- if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
143
- /* we always sign-extend 32-bit loads */
144
- tcg_out_ext32s(s, v0, TCG_REG_V0);
145
- } else {
146
- tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO);
147
- }
148
+ tcg_out_nop(s);
149
return true;
28
}
150
}
29
151
30
static bool fold_nor(OptContext *ctx, TCGOp *op)
152
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
31
@@ -XXX,XX +XXX,XX @@ static bool fold_shift(OptContext *ctx, TCGOp *op)
153
{
32
return fold_const2(ctx, op);
154
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
33
}
155
- MemOpIdx oi = l->oi;
34
156
- MemOp opc = get_memop(oi);
35
+static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
157
- MemOp s_bits = opc & MO_SIZE;
36
+{
158
- int i;
37
+ TCGOpcode neg_op;
159
+ MemOp opc = get_memop(l->oi);
38
+ bool have_neg;
160
161
/* resolve label address */
162
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
163
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
164
return false;
165
}
166
167
- i = 1;
168
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
169
- i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
170
- } else {
171
- i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
172
- }
173
- switch (s_bits) {
174
- case MO_8:
175
- i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg);
176
- break;
177
- case MO_16:
178
- i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg);
179
- break;
180
- case MO_32:
181
- i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
182
- break;
183
- case MO_64:
184
- if (TCG_TARGET_REG_BITS == 32) {
185
- i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
186
- } else {
187
- i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
188
- }
189
- break;
190
- default:
191
- g_assert_not_reached();
192
- }
193
- i = tcg_out_call_iarg_imm(s, i, oi);
194
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
195
196
- /* Tail call to the store helper. Thus force the return address
197
- computation to take place in the return address register. */
198
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr);
199
- i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA);
200
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true);
201
+ tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false);
202
/* delay slot */
203
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
204
+ tcg_out_nop(s);
39
+
205
+
40
+ if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
206
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
207
+ if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
41
+ return false;
208
+ return false;
42
+ }
209
+ }
43
+
210
+
44
+ switch (ctx->type) {
211
+ /* delay slot */
45
+ case TCG_TYPE_I32:
212
+ tcg_out_nop(s);
46
+ neg_op = INDEX_op_neg_i32;
213
return true;
47
+ have_neg = TCG_TARGET_HAS_neg_i32;
214
}
48
+ break;
215
49
+ case TCG_TYPE_I64:
50
+ neg_op = INDEX_op_neg_i64;
51
+ have_neg = TCG_TARGET_HAS_neg_i64;
52
+ break;
53
+ case TCG_TYPE_V64:
54
+ case TCG_TYPE_V128:
55
+ case TCG_TYPE_V256:
56
+ neg_op = INDEX_op_neg_vec;
57
+ have_neg = (TCG_TARGET_HAS_neg_vec &&
58
+ tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
59
+ break;
60
+ default:
61
+ g_assert_not_reached();
62
+ }
63
+ if (have_neg) {
64
+ op->opc = neg_op;
65
+ op->args[1] = op->args[2];
66
+ return fold_neg(ctx, op);
67
+ }
68
+ return false;
69
+}
70
+
71
static bool fold_sub(OptContext *ctx, TCGOp *op)
72
{
73
if (fold_const2(ctx, op) ||
74
- fold_xx_to_i(ctx, op, 0)) {
75
+ fold_xx_to_i(ctx, op, 0) ||
76
+ fold_sub_to_neg(ctx, op)) {
77
return true;
78
}
79
return false;
80
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
81
continue;
82
}
83
break;
84
- CASE_OP_32_64_VEC(sub):
85
- {
86
- TCGOpcode neg_op;
87
- bool have_neg;
88
-
89
- if (arg_is_const(op->args[2])) {
90
- /* Proceed with possible constant folding. */
91
- break;
92
- }
93
- switch (ctx.type) {
94
- case TCG_TYPE_I32:
95
- neg_op = INDEX_op_neg_i32;
96
- have_neg = TCG_TARGET_HAS_neg_i32;
97
- break;
98
- case TCG_TYPE_I64:
99
- neg_op = INDEX_op_neg_i64;
100
- have_neg = TCG_TARGET_HAS_neg_i64;
101
- break;
102
- case TCG_TYPE_V64:
103
- case TCG_TYPE_V128:
104
- case TCG_TYPE_V256:
105
- neg_op = INDEX_op_neg_vec;
106
- have_neg = tcg_can_emit_vec_op(neg_op, ctx.type,
107
- TCGOP_VECE(op)) > 0;
108
- break;
109
- default:
110
- g_assert_not_reached();
111
- }
112
- if (!have_neg) {
113
- break;
114
- }
115
- if (arg_is_const(op->args[1])
116
- && arg_info(op->args[1])->val == 0) {
117
- op->opc = neg_op;
118
- reset_temp(op->args[0]);
119
- op->args[1] = op->args[2];
120
- continue;
121
- }
122
- }
123
- break;
124
default:
125
break;
126
}
127
--
216
--
128
2.25.1
217
2.34.1
129
218
130
219
diff view generated by jsdifflib
1
Reduce some code duplication by folding the NE and EQ cases.
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and tcg_out_st_helper_args.
2
3
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
---
7
---
7
tcg/optimize.c | 145 ++++++++++++++++++++++++-------------------------
8
tcg/ppc/tcg-target.c.inc | 88 ++++++++++++----------------------------
8
1 file changed, 72 insertions(+), 73 deletions(-)
9
1 file changed, 26 insertions(+), 62 deletions(-)
9
10
10
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
11
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
12
--- a/tcg/optimize.c
13
--- a/tcg/ppc/tcg-target.c.inc
13
+++ b/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target.c.inc
14
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
15
return fold_const2(ctx, op);
16
[MO_BEUQ] = helper_be_stq_mmu,
16
}
17
};
17
18
18
+static bool fold_setcond2(OptContext *ctx, TCGOp *op)
19
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
19
+{
20
+{
20
+ TCGCond cond = op->args[5];
21
+ if (arg < 0) {
21
+ int i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
22
+ arg = TCG_REG_TMP1;
22
+ int inv = 0;
23
+
24
+ if (i >= 0) {
25
+ goto do_setcond_const;
26
+ }
23
+ }
27
+
24
+ tcg_out32(s, MFSPR | RT(arg) | LR);
28
+ switch (cond) {
25
+ return arg;
29
+ case TCG_COND_LT:
30
+ case TCG_COND_GE:
31
+ /*
32
+ * Simplify LT/GE comparisons vs zero to a single compare
33
+ * vs the high word of the input.
34
+ */
35
+ if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
36
+ arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
37
+ goto do_setcond_high;
38
+ }
39
+ break;
40
+
41
+ case TCG_COND_NE:
42
+ inv = 1;
43
+ QEMU_FALLTHROUGH;
44
+ case TCG_COND_EQ:
45
+ /*
46
+ * Simplify EQ/NE comparisons where one of the pairs
47
+ * can be simplified.
48
+ */
49
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[1],
50
+ op->args[3], cond);
51
+ switch (i ^ inv) {
52
+ case 0:
53
+ goto do_setcond_const;
54
+ case 1:
55
+ goto do_setcond_high;
56
+ }
57
+
58
+ i = do_constant_folding_cond(INDEX_op_setcond_i32, op->args[2],
59
+ op->args[4], cond);
60
+ switch (i ^ inv) {
61
+ case 0:
62
+ goto do_setcond_const;
63
+ case 1:
64
+ op->args[2] = op->args[3];
65
+ op->args[3] = cond;
66
+ op->opc = INDEX_op_setcond_i32;
67
+ break;
68
+ }
69
+ break;
70
+
71
+ default:
72
+ break;
73
+
74
+ do_setcond_high:
75
+ op->args[1] = op->args[2];
76
+ op->args[2] = op->args[4];
77
+ op->args[3] = cond;
78
+ op->opc = INDEX_op_setcond_i32;
79
+ break;
80
+ }
81
+ return false;
82
+
83
+ do_setcond_const:
84
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
85
+}
26
+}
86
+
27
+
87
static bool fold_shift(OptContext *ctx, TCGOp *op)
28
+/*
29
+ * For the purposes of ppc32 sorting 4 input registers into 4 argument
30
+ * registers, there is an outside chance we would require 3 temps.
31
+ * Because of constraints, no inputs are in r3, and env will not be
32
+ * placed into r3 until after the sorting is done, and is thus free.
33
+ */
34
+static const TCGLdstHelperParam ldst_helper_param = {
35
+ .ra_gen = ldst_ra_gen,
36
+ .ntmp = 3,
37
+ .tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 }
38
+};
39
+
40
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
88
{
41
{
89
return fold_const2(ctx, op);
42
- MemOpIdx oi = lb->oi;
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
43
- MemOp opc = get_memop(oi);
91
}
44
- TCGReg hi, lo, arg = TCG_REG_R3;
92
break;
45
+ MemOp opc = get_memop(lb->oi);
93
46
94
- case INDEX_op_setcond2_i32:
47
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
95
- i = do_constant_folding_cond2(&op->args[1], &op->args[3],
48
return false;
96
- op->args[5]);
49
}
97
- if (i >= 0) {
50
98
- do_setcond_const:
51
- tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
99
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
100
- continue;
101
- }
102
- if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
103
- && arg_is_const(op->args[3])
104
- && arg_info(op->args[3])->val == 0
105
- && arg_is_const(op->args[4])
106
- && arg_info(op->args[4])->val == 0) {
107
- /* Simplify LT/GE comparisons vs zero to a single compare
108
- vs the high word of the input. */
109
- do_setcond_high:
110
- reset_temp(op->args[0]);
111
- arg_info(op->args[0])->z_mask = 1;
112
- op->opc = INDEX_op_setcond_i32;
113
- op->args[1] = op->args[2];
114
- op->args[2] = op->args[4];
115
- op->args[3] = op->args[5];
116
- break;
117
- }
118
- if (op->args[5] == TCG_COND_EQ) {
119
- /* Simplify EQ comparisons where one of the pairs
120
- can be simplified. */
121
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
122
- op->args[1], op->args[3],
123
- TCG_COND_EQ);
124
- if (i == 0) {
125
- goto do_setcond_const;
126
- } else if (i > 0) {
127
- goto do_setcond_high;
128
- }
129
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
130
- op->args[2], op->args[4],
131
- TCG_COND_EQ);
132
- if (i == 0) {
133
- goto do_setcond_high;
134
- } else if (i < 0) {
135
- break;
136
- }
137
- do_setcond_low:
138
- reset_temp(op->args[0]);
139
- arg_info(op->args[0])->z_mask = 1;
140
- op->opc = INDEX_op_setcond_i32;
141
- op->args[2] = op->args[3];
142
- op->args[3] = op->args[5];
143
- break;
144
- }
145
- if (op->args[5] == TCG_COND_NE) {
146
- /* Simplify NE comparisons where one of the pairs
147
- can be simplified. */
148
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
149
- op->args[1], op->args[3],
150
- TCG_COND_NE);
151
- if (i == 0) {
152
- goto do_setcond_high;
153
- } else if (i > 0) {
154
- goto do_setcond_const;
155
- }
156
- i = do_constant_folding_cond(INDEX_op_setcond_i32,
157
- op->args[2], op->args[4],
158
- TCG_COND_NE);
159
- if (i == 0) {
160
- goto do_setcond_low;
161
- } else if (i > 0) {
162
- goto do_setcond_const;
163
- }
164
- }
165
- break;
166
-
52
-
167
default:
53
- lo = lb->addrlo_reg;
168
break;
54
- hi = lb->addrhi_reg;
169
55
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
170
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
56
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
171
CASE_OP_32_64(shr):
57
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
172
done = fold_shift(&ctx, op);
58
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
173
break;
59
- } else {
174
+ case INDEX_op_setcond2_i32:
60
- /* If the address needed to be zero-extended, we'll have already
175
+ done = fold_setcond2(&ctx, op);
61
- placed it in R4. The only remaining case is 64-bit guest. */
176
+ break;
62
- tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
177
CASE_OP_32_64_VEC(sub):
63
- }
178
done = fold_sub(&ctx, op);
64
-
179
break;
65
- tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
66
- tcg_out32(s, MFSPR | RT(arg) | LR);
67
-
68
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
69
tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
70
-
71
- lo = lb->datalo_reg;
72
- hi = lb->datahi_reg;
73
- if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
74
- tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
75
- tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
76
- } else {
77
- tcg_out_movext(s, lb->type, lo,
78
- TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_R3);
79
- }
80
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
81
82
tcg_out_b(s, 0, lb->raddr);
83
return true;
84
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
85
86
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
87
{
88
- MemOpIdx oi = lb->oi;
89
- MemOp opc = get_memop(oi);
90
- MemOp s_bits = opc & MO_SIZE;
91
- TCGReg hi, lo, arg = TCG_REG_R3;
92
+ MemOp opc = get_memop(lb->oi);
93
94
if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
95
return false;
96
}
97
98
- tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
99
-
100
- lo = lb->addrlo_reg;
101
- hi = lb->addrhi_reg;
102
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
103
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
104
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
105
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
106
- } else {
107
- /* If the address needed to be zero-extended, we'll have already
108
- placed it in R4. The only remaining case is 64-bit guest. */
109
- tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
110
- }
111
-
112
- lo = lb->datalo_reg;
113
- hi = lb->datahi_reg;
114
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
115
- arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
116
- tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
117
- tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
118
- } else {
119
- tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
120
- arg++, lb->type, s_bits, lo);
121
- }
122
-
123
- tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
124
- tcg_out32(s, MFSPR | RT(arg) | LR);
125
-
126
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
127
tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
128
129
tcg_out_b(s, 0, lb->raddr);
180
--
130
--
181
2.25.1
131
2.34.1
182
132
183
133
diff view generated by jsdifflib
1
This will allow callers to tail call to these functions
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
and return true indicating processing complete.
2
and tcg_out_st_helper_args.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 9 +++++----
8
tcg/riscv/tcg-target.c.inc | 37 ++++++++++---------------------------
10
1 file changed, 5 insertions(+), 4 deletions(-)
9
1 file changed, 10 insertions(+), 27 deletions(-)
11
10
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
13
--- a/tcg/riscv/tcg-target.c.inc
15
+++ b/tcg/optimize.c
14
+++ b/tcg/riscv/tcg-target.c.inc
16
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
15
@@ -XXX,XX +XXX,XX @@ static void tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
17
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
16
tcg_debug_assert(ok);
18
}
17
}
19
18
20
-static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
19
+/* We have three temps, we might as well expose them. */
21
+static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
20
+static const TCGLdstHelperParam ldst_helper_param = {
21
+ .ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
22
+};
23
+
24
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
22
{
25
{
23
TCGTemp *dst_ts = arg_temp(dst);
26
- MemOpIdx oi = l->oi;
24
TCGTemp *src_ts = arg_temp(src);
27
- MemOp opc = get_memop(oi);
25
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
28
- TCGReg a0 = tcg_target_call_iarg_regs[0];
26
29
- TCGReg a1 = tcg_target_call_iarg_regs[1];
27
if (ts_are_copies(dst_ts, src_ts)) {
30
- TCGReg a2 = tcg_target_call_iarg_regs[2];
28
tcg_op_remove(ctx->tcg, op);
31
- TCGReg a3 = tcg_target_call_iarg_regs[3];
29
- return;
32
+ MemOp opc = get_memop(l->oi);
30
+ return true;
33
34
/* resolve label address */
35
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
36
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
31
}
37
}
32
38
33
reset_ts(dst_ts);
39
/* call load helper */
34
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
40
- tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
35
di->is_const = si->is_const;
41
- tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
36
di->val = si->val;
42
- tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
43
- tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
44
-
45
+ tcg_out_ld_helper_args(s, l, &ldst_helper_param);
46
tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
47
- tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
48
+ tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
49
50
tcg_out_goto(s, l->raddr);
51
return true;
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
53
54
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
55
{
56
- MemOpIdx oi = l->oi;
57
- MemOp opc = get_memop(oi);
58
- MemOp s_bits = opc & MO_SIZE;
59
- TCGReg a0 = tcg_target_call_iarg_regs[0];
60
- TCGReg a1 = tcg_target_call_iarg_regs[1];
61
- TCGReg a2 = tcg_target_call_iarg_regs[2];
62
- TCGReg a3 = tcg_target_call_iarg_regs[3];
63
- TCGReg a4 = tcg_target_call_iarg_regs[4];
64
+ MemOp opc = get_memop(l->oi);
65
66
/* resolve label address */
67
if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
68
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
37
}
69
}
38
+ return true;
70
39
}
71
/* call store helper */
40
72
- tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
41
-static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
73
- tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
42
+static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
74
- tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, a2,
43
TCGArg dst, uint64_t val)
75
- l->type, s_bits, l->datalo_reg);
44
{
76
- tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
45
const TCGOpDef *def = &tcg_op_defs[op->opc];
77
- tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
46
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
78
-
47
/* Convert movi to mov with constant temp. */
79
+ tcg_out_st_helper_args(s, l, &ldst_helper_param);
48
tv = tcg_constant_internal(type, val);
80
tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
49
init_ts_info(ctx, tv);
81
50
- tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
82
tcg_out_goto(s, l->raddr);
51
+ return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
52
}
53
54
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
55
--
83
--
56
2.25.1
84
2.34.1
57
85
58
86
diff view generated by jsdifflib
1
Adjust the interface to take the OptContext parameter instead
1
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
2
of TCGContext or both.
2
and tcg_out_st_helper_args.
3
3
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 67 +++++++++++++++++++++++++-------------------------
7
tcg/s390x/tcg-target.c.inc | 35 ++++++++++-------------------------
9
1 file changed, 34 insertions(+), 33 deletions(-)
8
1 file changed, 10 insertions(+), 25 deletions(-)
10
9
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
12
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
12
--- a/tcg/s390x/tcg-target.c.inc
14
+++ b/tcg/optimize.c
13
+++ b/tcg/s390x/tcg-target.c.inc
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
14
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
16
} TempOptInfo;
17
18
typedef struct OptContext {
19
+ TCGContext *tcg;
20
TCGTempSet temps_used;
21
} OptContext;
22
23
@@ -XXX,XX +XXX,XX @@ static bool args_are_copies(TCGArg arg1, TCGArg arg2)
24
return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
25
}
15
}
26
16
27
-static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
17
#if defined(CONFIG_SOFTMMU)
28
+static void tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
18
+static const TCGLdstHelperParam ldst_helper_param = {
19
+ .ntmp = 1, .tmp = { TCG_TMP0 }
20
+};
21
+
22
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
29
{
23
{
30
TCGTemp *dst_ts = arg_temp(dst);
24
- TCGReg addr_reg = lb->addrlo_reg;
31
TCGTemp *src_ts = arg_temp(src);
25
- TCGReg data_reg = lb->datalo_reg;
32
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
26
- MemOpIdx oi = lb->oi;
33
TCGOpcode new_op;
27
- MemOp opc = get_memop(oi);
34
28
+ MemOp opc = get_memop(lb->oi);
35
if (ts_are_copies(dst_ts, src_ts)) {
29
36
- tcg_op_remove(s, op);
30
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
37
+ tcg_op_remove(ctx->tcg, op);
31
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
38
return;
32
return false;
39
}
33
}
40
34
41
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
35
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
36
- if (TARGET_LONG_BITS == 64) {
37
- tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
38
- }
39
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
40
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
41
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
42
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
43
+ tcg_out_ld_helper_args(s, lb, &ldst_helper_param);
44
+ tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
45
+ tcg_out_ld_helper_ret(s, lb, false, &ldst_helper_param);
46
47
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
48
return true;
49
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
50
51
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
52
{
53
- TCGReg addr_reg = lb->addrlo_reg;
54
- TCGReg data_reg = lb->datalo_reg;
55
- MemOpIdx oi = lb->oi;
56
- MemOp opc = get_memop(oi);
57
- MemOp size = opc & MO_SIZE;
58
+ MemOp opc = get_memop(lb->oi);
59
60
if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
61
(intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
62
return false;
42
}
63
}
43
}
64
44
65
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
45
-static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
66
- if (TARGET_LONG_BITS == 64) {
46
- TCGOp *op, TCGArg dst, uint64_t val)
67
- tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
47
+static void tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
68
- }
48
+ TCGArg dst, uint64_t val)
69
- tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
49
{
70
- TCG_REG_R4, lb->type, size, data_reg);
50
const TCGOpDef *def = &tcg_op_defs[op->opc];
71
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
51
TCGType type;
72
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
52
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_movi(TCGContext *s, OptContext *ctx,
73
+ tcg_out_st_helper_args(s, lb, &ldst_helper_param);
53
/* Convert movi to mov with constant temp. */
74
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
54
tv = tcg_constant_internal(type, val);
75
55
init_ts_info(ctx, tv);
76
tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
56
- tcg_opt_gen_mov(s, op, dst, temp_arg(tv));
57
+ tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
58
}
59
60
static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
{
63
int nb_temps, nb_globals, i;
64
TCGOp *op, *op_next, *prev_mb = NULL;
65
- OptContext ctx = {};
66
+ OptContext ctx = { .tcg = s };
67
68
/* Array VALS has an element for each temp.
69
If this temp holds a constant then its value is kept in VALS' element.
70
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
71
CASE_OP_32_64(rotr):
72
if (arg_is_const(op->args[1])
73
&& arg_info(op->args[1])->val == 0) {
74
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
76
continue;
77
}
78
break;
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
80
if (!arg_is_const(op->args[1])
81
&& arg_is_const(op->args[2])
82
&& arg_info(op->args[2])->val == 0) {
83
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
84
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
85
continue;
86
}
87
break;
88
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
89
if (!arg_is_const(op->args[1])
90
&& arg_is_const(op->args[2])
91
&& arg_info(op->args[2])->val == -1) {
92
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
93
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
94
continue;
95
}
96
break;
97
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
98
99
if (partmask == 0) {
100
tcg_debug_assert(nb_oargs == 1);
101
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
102
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
103
continue;
104
}
105
if (affected == 0) {
106
tcg_debug_assert(nb_oargs == 1);
107
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
108
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
109
continue;
110
}
111
112
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
CASE_OP_32_64(mulsh):
114
if (arg_is_const(op->args[2])
115
&& arg_info(op->args[2])->val == 0) {
116
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
117
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
118
continue;
119
}
120
break;
121
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
122
CASE_OP_32_64_VEC(or):
123
CASE_OP_32_64_VEC(and):
124
if (args_are_copies(op->args[1], op->args[2])) {
125
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
126
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
127
continue;
128
}
129
break;
130
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
131
CASE_OP_32_64_VEC(sub):
132
CASE_OP_32_64_VEC(xor):
133
if (args_are_copies(op->args[1], op->args[2])) {
134
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], 0);
135
+ tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
136
continue;
137
}
138
break;
139
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
140
allocator where needed and possible. Also detect copies. */
141
switch (opc) {
142
CASE_OP_32_64_VEC(mov):
143
- tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
144
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
145
continue;
146
147
case INDEX_op_dup_vec:
148
if (arg_is_const(op->args[1])) {
149
tmp = arg_info(op->args[1])->val;
150
tmp = dup_const(TCGOP_VECE(op), tmp);
151
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
152
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
153
continue;
154
}
155
break;
156
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
157
case INDEX_op_dup2_vec:
158
assert(TCG_TARGET_REG_BITS == 32);
159
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
160
- tcg_opt_gen_movi(s, &ctx, op, op->args[0],
161
+ tcg_opt_gen_movi(&ctx, op, op->args[0],
162
deposit64(arg_info(op->args[1])->val, 32, 32,
163
arg_info(op->args[2])->val));
164
continue;
165
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
166
case INDEX_op_extrh_i64_i32:
167
if (arg_is_const(op->args[1])) {
168
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
169
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
170
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
171
continue;
172
}
173
break;
174
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
175
if (arg_is_const(op->args[1])) {
176
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
177
op->args[2]);
178
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
179
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
180
continue;
181
}
182
break;
183
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
184
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
185
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
186
arg_info(op->args[2])->val);
187
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
188
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
189
continue;
190
}
191
break;
192
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
193
TCGArg v = arg_info(op->args[1])->val;
194
if (v != 0) {
195
tmp = do_constant_folding(opc, v, 0);
196
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
197
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
198
} else {
199
- tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
200
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[2]);
201
}
202
continue;
203
}
204
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
205
tmp = deposit64(arg_info(op->args[1])->val,
206
op->args[3], op->args[4],
207
arg_info(op->args[2])->val);
208
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
209
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
210
continue;
211
}
212
break;
213
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
214
if (arg_is_const(op->args[1])) {
215
tmp = extract64(arg_info(op->args[1])->val,
216
op->args[2], op->args[3]);
217
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
218
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
219
continue;
220
}
221
break;
222
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
223
if (arg_is_const(op->args[1])) {
224
tmp = sextract64(arg_info(op->args[1])->val,
225
op->args[2], op->args[3]);
226
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
227
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
228
continue;
229
}
230
break;
231
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
232
tmp = (int32_t)(((uint32_t)v1 >> shr) |
233
((uint32_t)v2 << (32 - shr)));
234
}
235
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
236
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
237
continue;
238
}
239
break;
240
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
241
tmp = do_constant_folding_cond(opc, op->args[1],
242
op->args[2], op->args[3]);
243
if (tmp != 2) {
244
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
245
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
246
continue;
247
}
248
break;
249
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
250
tmp = do_constant_folding_cond(opc, op->args[1],
251
op->args[2], op->args[5]);
252
if (tmp != 2) {
253
- tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
254
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
255
continue;
256
}
257
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
258
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
259
260
rl = op->args[0];
261
rh = op->args[1];
262
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
263
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
264
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
265
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
266
continue;
267
}
268
break;
269
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
270
271
rl = op->args[0];
272
rh = op->args[1];
273
- tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
274
- tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
275
+ tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
276
+ tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
277
continue;
278
}
279
break;
280
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
281
op->args[5]);
282
if (tmp != 2) {
283
do_setcond_const:
284
- tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
285
+ tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
286
continue;
287
}
288
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
289
--
77
--
290
2.25.1
78
2.34.1
291
79
292
80
diff view generated by jsdifflib
1
Recognize the constant function for or-complement.
1
The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available
2
registers. Now that we handle overlap betwen inputs and helper arguments,
3
we can allow any allocatable reg.
2
4
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 1 +
8
tcg/loongarch64/tcg-target-con-set.h | 2 --
9
1 file changed, 1 insertion(+)
9
tcg/loongarch64/tcg-target-con-str.h | 1 -
10
tcg/loongarch64/tcg-target.c.inc | 23 ++++-------------------
11
3 files changed, 4 insertions(+), 22 deletions(-)
10
12
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/tcg/loongarch64/tcg-target-con-set.h b/tcg/loongarch64/tcg-target-con-set.h
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
15
--- a/tcg/loongarch64/tcg-target-con-set.h
14
+++ b/tcg/optimize.c
16
+++ b/tcg/loongarch64/tcg-target-con-set.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_or(OptContext *ctx, TCGOp *op)
17
@@ -XXX,XX +XXX,XX @@
16
static bool fold_orc(OptContext *ctx, TCGOp *op)
18
C_O0_I1(r)
19
C_O0_I2(rZ, r)
20
C_O0_I2(rZ, rZ)
21
-C_O0_I2(LZ, L)
22
C_O1_I1(r, r)
23
-C_O1_I1(r, L)
24
C_O1_I2(r, r, rC)
25
C_O1_I2(r, r, ri)
26
C_O1_I2(r, r, rI)
27
diff --git a/tcg/loongarch64/tcg-target-con-str.h b/tcg/loongarch64/tcg-target-con-str.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/tcg/loongarch64/tcg-target-con-str.h
30
+++ b/tcg/loongarch64/tcg-target-con-str.h
31
@@ -XXX,XX +XXX,XX @@
32
* REGS(letter, register_mask)
33
*/
34
REGS('r', ALL_GENERAL_REGS)
35
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
36
37
/*
38
* Define constraint letters for constants:
39
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
40
index XXXXXXX..XXXXXXX 100644
41
--- a/tcg/loongarch64/tcg-target.c.inc
42
+++ b/tcg/loongarch64/tcg-target.c.inc
43
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
44
#define TCG_CT_CONST_C12 0x1000
45
#define TCG_CT_CONST_WSZ 0x2000
46
47
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
48
-/*
49
- * For softmmu, we need to avoid conflicts with the first 5
50
- * argument registers to call the helper. Some of these are
51
- * also used for the tlb lookup.
52
- */
53
-#ifdef CONFIG_SOFTMMU
54
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
55
-#else
56
-#define SOFTMMU_RESERVE_REGS 0
57
-#endif
58
-
59
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
60
61
static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
17
{
62
{
18
if (fold_const2(ctx, op) ||
63
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
19
+ fold_xx_to_i(ctx, op, -1) ||
64
case INDEX_op_st32_i64:
20
fold_xi_to_x(ctx, op, -1) ||
65
case INDEX_op_st_i32:
21
fold_ix_to_not(ctx, op, 0)) {
66
case INDEX_op_st_i64:
22
return true;
67
+ case INDEX_op_qemu_st_i32:
68
+ case INDEX_op_qemu_st_i64:
69
return C_O0_I2(rZ, r);
70
71
case INDEX_op_brcond_i32:
72
case INDEX_op_brcond_i64:
73
return C_O0_I2(rZ, rZ);
74
75
- case INDEX_op_qemu_st_i32:
76
- case INDEX_op_qemu_st_i64:
77
- return C_O0_I2(LZ, L);
78
-
79
case INDEX_op_ext8s_i32:
80
case INDEX_op_ext8s_i64:
81
case INDEX_op_ext8u_i32:
82
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
83
case INDEX_op_ld32u_i64:
84
case INDEX_op_ld_i32:
85
case INDEX_op_ld_i64:
86
- return C_O1_I1(r, r);
87
-
88
case INDEX_op_qemu_ld_i32:
89
case INDEX_op_qemu_ld_i64:
90
- return C_O1_I1(r, L);
91
+ return C_O1_I1(r, r);
92
93
case INDEX_op_andc_i32:
94
case INDEX_op_andc_i64:
23
--
95
--
24
2.25.1
96
2.34.1
25
97
26
98
diff view generated by jsdifflib
1
Pull the "op r, 0, b => movi r, 0" optimization into a function,
1
While performing the load in the delay slot of the call to the common
2
and use it in fold_shift.
2
bswap helper function is cute, it is not worth the added complexity.
3
3
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
6
---
8
tcg/optimize.c | 28 ++++++++++------------------
7
tcg/mips/tcg-target.h | 4 +-
9
1 file changed, 10 insertions(+), 18 deletions(-)
8
tcg/mips/tcg-target.c.inc | 284 ++++++--------------------------------
9
2 files changed, 48 insertions(+), 240 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/tcg/mips/tcg-target.h
14
+++ b/tcg/optimize.c
14
+++ b/tcg/mips/tcg-target.h
15
@@ -XXX,XX +XXX,XX @@ static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
15
@@ -XXX,XX +XXX,XX @@ extern bool use_mips32r2_instructions;
16
return false;
16
#define TCG_TARGET_HAS_ext16u_i64 0 /* andi rt, rs, 0xffff */
17
#endif
18
19
-#define TCG_TARGET_DEFAULT_MO (0)
20
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
21
+#define TCG_TARGET_DEFAULT_MO 0
22
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
23
24
#define TCG_TARGET_NEED_LDST_LABELS
25
26
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
27
index XXXXXXX..XXXXXXX 100644
28
--- a/tcg/mips/tcg-target.c.inc
29
+++ b/tcg/mips/tcg-target.c.inc
30
@@ -XXX,XX +XXX,XX @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg,
17
}
31
}
18
32
19
+/* If the binary operation has first argument @i, fold to @i. */
33
#if defined(CONFIG_SOFTMMU)
20
+static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
34
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
21
+{
35
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
22
+ if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
36
[MO_UB] = helper_ret_ldub_mmu,
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
37
[MO_SB] = helper_ret_ldsb_mmu,
24
+ }
38
- [MO_LEUW] = helper_le_lduw_mmu,
25
+ return false;
39
- [MO_LESW] = helper_le_ldsw_mmu,
26
+}
40
- [MO_LEUL] = helper_le_ldul_mmu,
27
+
41
- [MO_LEUQ] = helper_le_ldq_mmu,
28
/* If the binary operation has first argument @i, fold to NOT. */
42
- [MO_BEUW] = helper_be_lduw_mmu,
29
static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
43
- [MO_BESW] = helper_be_ldsw_mmu,
44
- [MO_BEUL] = helper_be_ldul_mmu,
45
- [MO_BEUQ] = helper_be_ldq_mmu,
46
-#if TCG_TARGET_REG_BITS == 64
47
- [MO_LESL] = helper_le_ldsl_mmu,
48
- [MO_BESL] = helper_be_ldsl_mmu,
49
+#if HOST_BIG_ENDIAN
50
+ [MO_UW] = helper_be_lduw_mmu,
51
+ [MO_SW] = helper_be_ldsw_mmu,
52
+ [MO_UL] = helper_be_ldul_mmu,
53
+ [MO_SL] = helper_be_ldsl_mmu,
54
+ [MO_UQ] = helper_be_ldq_mmu,
55
+#else
56
+ [MO_UW] = helper_le_lduw_mmu,
57
+ [MO_SW] = helper_le_ldsw_mmu,
58
+ [MO_UL] = helper_le_ldul_mmu,
59
+ [MO_UQ] = helper_le_ldq_mmu,
60
+ [MO_SL] = helper_le_ldsl_mmu,
61
#endif
62
};
63
64
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
65
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
66
[MO_UB] = helper_ret_stb_mmu,
67
- [MO_LEUW] = helper_le_stw_mmu,
68
- [MO_LEUL] = helper_le_stl_mmu,
69
- [MO_LEUQ] = helper_le_stq_mmu,
70
- [MO_BEUW] = helper_be_stw_mmu,
71
- [MO_BEUL] = helper_be_stl_mmu,
72
- [MO_BEUQ] = helper_be_stq_mmu,
73
+#if HOST_BIG_ENDIAN
74
+ [MO_UW] = helper_be_stw_mmu,
75
+ [MO_UL] = helper_be_stl_mmu,
76
+ [MO_UQ] = helper_be_stq_mmu,
77
+#else
78
+ [MO_UW] = helper_le_stw_mmu,
79
+ [MO_UL] = helper_le_stl_mmu,
80
+ [MO_UQ] = helper_le_stq_mmu,
81
+#endif
82
};
83
84
/* We have four temps, we might as well expose three of them. */
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
86
87
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
88
89
- tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
90
+ tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
91
/* delay slot */
92
tcg_out_nop(s);
93
94
@@ -XXX,XX +XXX,XX @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
95
96
tcg_out_st_helper_args(s, l, &ldst_helper_param);
97
98
- tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false);
99
+ tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
100
/* delay slot */
101
tcg_out_nop(s);
102
103
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
104
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
105
TCGReg base, MemOp opc, TCGType type)
30
{
106
{
31
@@ -XXX,XX +XXX,XX @@ static bool fold_sextract(OptContext *ctx, TCGOp *op)
107
- switch (opc & (MO_SSIZE | MO_BSWAP)) {
32
static bool fold_shift(OptContext *ctx, TCGOp *op)
108
+ switch (opc & MO_SSIZE) {
33
{
109
case MO_UB:
34
if (fold_const2(ctx, op) ||
110
tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
35
+ fold_ix_to_i(ctx, op, 0) ||
111
break;
36
fold_xi_to_x(ctx, op, 0)) {
112
case MO_SB:
37
return true;
113
tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
38
}
114
break;
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
115
- case MO_UW | MO_BSWAP:
40
break;
116
- tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
41
}
117
- tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
42
118
- break;
43
- /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
119
case MO_UW:
44
- and "sub r, 0, a => neg r, a" case. */
120
tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
45
- switch (opc) {
121
break;
46
- CASE_OP_32_64(shl):
122
- case MO_SW | MO_BSWAP:
47
- CASE_OP_32_64(shr):
123
- tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
48
- CASE_OP_32_64(sar):
124
- tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS);
49
- CASE_OP_32_64(rotl):
125
- break;
50
- CASE_OP_32_64(rotr):
126
case MO_SW:
51
- if (arg_is_const(op->args[1])
127
tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
52
- && arg_info(op->args[1])->val == 0) {
128
break;
53
- tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
129
- case MO_UL | MO_BSWAP:
54
- continue;
130
- if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
131
- if (use_mips32r2_instructions) {
132
- tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
133
- tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
134
- } else {
135
- tcg_out_bswap_subr(s, bswap32u_addr);
136
- /* delay slot */
137
- tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
138
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
55
- }
139
- }
56
- break;
140
- break;
57
- default:
141
- }
142
- /* FALLTHRU */
143
- case MO_SL | MO_BSWAP:
144
- if (use_mips32r2_instructions) {
145
- tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
146
- tcg_out_bswap32(s, lo, lo, 0);
147
- } else {
148
- tcg_out_bswap_subr(s, bswap32_addr);
149
- /* delay slot */
150
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
151
- tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_TMP3);
152
- }
153
- break;
154
case MO_UL:
155
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
156
tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
157
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
158
case MO_SL:
159
tcg_out_opc_imm(s, OPC_LW, lo, base, 0);
160
break;
161
- case MO_UQ | MO_BSWAP:
162
- if (TCG_TARGET_REG_BITS == 64) {
163
- if (use_mips32r2_instructions) {
164
- tcg_out_opc_imm(s, OPC_LD, lo, base, 0);
165
- tcg_out_bswap64(s, lo, lo);
166
- } else {
167
- tcg_out_bswap_subr(s, bswap64_addr);
168
- /* delay slot */
169
- tcg_out_opc_imm(s, OPC_LD, TCG_TMP0, base, 0);
170
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
171
- }
172
- } else if (use_mips32r2_instructions) {
173
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
174
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP1, base, 4);
175
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
176
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
177
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
178
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
179
- } else {
180
- tcg_out_bswap_subr(s, bswap32_addr);
181
- /* delay slot */
182
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 0);
183
- tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, base, 4);
184
- tcg_out_bswap_subr(s, bswap32_addr);
185
- /* delay slot */
186
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
187
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
188
- }
189
- break;
190
case MO_UQ:
191
/* Prefer to load from offset 0 first, but allow for overlap. */
192
if (TCG_TARGET_REG_BITS == 64) {
193
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
194
const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL;
195
const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR;
196
const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL;
197
+ bool sgn = opc & MO_SIGN;
198
199
- bool sgn = (opc & MO_SIGN);
200
-
201
- switch (opc & (MO_SSIZE | MO_BSWAP)) {
202
- case MO_SW | MO_BE:
203
- case MO_UW | MO_BE:
204
- tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
205
- tcg_out_opc_imm(s, OPC_LBU, lo, base, 1);
206
- if (use_mips32r2_instructions) {
207
- tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
208
- } else {
209
- tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
210
- tcg_out_opc_reg(s, OPC_OR, lo, TCG_TMP0, TCG_TMP1);
211
- }
212
- break;
213
-
214
- case MO_SW | MO_LE:
215
- case MO_UW | MO_LE:
216
- if (use_mips32r2_instructions && lo != base) {
217
+ switch (opc & MO_SIZE) {
218
+ case MO_16:
219
+ if (HOST_BIG_ENDIAN) {
220
+ tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 0);
221
+ tcg_out_opc_imm(s, OPC_LBU, lo, base, 1);
222
+ if (use_mips32r2_instructions) {
223
+ tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
224
+ } else {
225
+ tcg_out_opc_sa(s, OPC_SLL, TCG_TMP0, TCG_TMP0, 8);
226
+ tcg_out_opc_reg(s, OPC_OR, lo, lo, TCG_TMP0);
227
+ }
228
+ } else if (use_mips32r2_instructions && lo != base) {
229
tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
230
tcg_out_opc_imm(s, sgn ? OPC_LB : OPC_LBU, TCG_TMP0, base, 1);
231
tcg_out_opc_bf(s, OPC_INS, lo, TCG_TMP0, 31, 8);
232
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
233
}
234
break;
235
236
- case MO_SL:
237
- case MO_UL:
238
+ case MO_32:
239
tcg_out_opc_imm(s, lw1, lo, base, 0);
240
tcg_out_opc_imm(s, lw2, lo, base, 3);
241
if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn) {
242
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
243
}
244
break;
245
246
- case MO_UL | MO_BSWAP:
247
- case MO_SL | MO_BSWAP:
248
- if (use_mips32r2_instructions) {
249
- tcg_out_opc_imm(s, lw1, lo, base, 0);
250
- tcg_out_opc_imm(s, lw2, lo, base, 3);
251
- tcg_out_bswap32(s, lo, lo,
252
- TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64
253
- ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0);
254
- } else {
255
- const tcg_insn_unit *subr =
256
- (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn
257
- ? bswap32u_addr : bswap32_addr);
258
-
259
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0);
260
- tcg_out_bswap_subr(s, subr);
261
- /* delay slot */
262
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3);
263
- tcg_out_mov(s, type, lo, TCG_TMP3);
264
- }
265
- break;
266
-
267
- case MO_UQ:
268
+ case MO_64:
269
if (TCG_TARGET_REG_BITS == 64) {
270
tcg_out_opc_imm(s, ld1, lo, base, 0);
271
tcg_out_opc_imm(s, ld2, lo, base, 7);
272
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
273
}
274
break;
275
276
- case MO_UQ | MO_BSWAP:
277
- if (TCG_TARGET_REG_BITS == 64) {
278
- if (use_mips32r2_instructions) {
279
- tcg_out_opc_imm(s, ld1, lo, base, 0);
280
- tcg_out_opc_imm(s, ld2, lo, base, 7);
281
- tcg_out_bswap64(s, lo, lo);
282
- } else {
283
- tcg_out_opc_imm(s, ld1, TCG_TMP0, base, 0);
284
- tcg_out_bswap_subr(s, bswap64_addr);
285
- /* delay slot */
286
- tcg_out_opc_imm(s, ld2, TCG_TMP0, base, 7);
287
- tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
288
- }
289
- } else if (use_mips32r2_instructions) {
290
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
291
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
292
- tcg_out_opc_imm(s, lw1, TCG_TMP1, base, 4 + 0);
293
- tcg_out_opc_imm(s, lw2, TCG_TMP1, base, 4 + 3);
294
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, TCG_TMP0);
295
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, TCG_TMP1);
296
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? lo : hi, TCG_TMP0, 16);
297
- tcg_out_opc_sa(s, OPC_ROTR, MIPS_BE ? hi : lo, TCG_TMP1, 16);
298
- } else {
299
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0 + 0);
300
- tcg_out_bswap_subr(s, bswap32_addr);
301
- /* delay slot */
302
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 0 + 3);
303
- tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 4 + 0);
304
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? lo : hi, TCG_TMP3);
305
- tcg_out_bswap_subr(s, bswap32_addr);
306
- /* delay slot */
307
- tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 4 + 3);
308
- tcg_out_mov(s, TCG_TYPE_I32, MIPS_BE ? hi : lo, TCG_TMP3);
309
- }
310
- break;
311
-
312
default:
313
g_assert_not_reached();
314
}
315
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
316
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
317
TCGReg base, MemOp opc)
318
{
319
- /* Don't clutter the code below with checks to avoid bswapping ZERO. */
320
- if ((lo | hi) == 0) {
321
- opc &= ~MO_BSWAP;
322
- }
323
-
324
- switch (opc & (MO_SIZE | MO_BSWAP)) {
325
+ switch (opc & MO_SIZE) {
326
case MO_8:
327
tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
328
break;
329
-
330
- case MO_16 | MO_BSWAP:
331
- tcg_out_bswap16(s, TCG_TMP1, lo, 0);
332
- lo = TCG_TMP1;
333
- /* FALLTHRU */
334
case MO_16:
335
tcg_out_opc_imm(s, OPC_SH, lo, base, 0);
336
break;
337
-
338
- case MO_32 | MO_BSWAP:
339
- tcg_out_bswap32(s, TCG_TMP3, lo, 0);
340
- lo = TCG_TMP3;
341
- /* FALLTHRU */
342
case MO_32:
343
tcg_out_opc_imm(s, OPC_SW, lo, base, 0);
344
break;
345
-
346
- case MO_64 | MO_BSWAP:
347
- if (TCG_TARGET_REG_BITS == 64) {
348
- tcg_out_bswap64(s, TCG_TMP3, lo);
349
- tcg_out_opc_imm(s, OPC_SD, TCG_TMP3, base, 0);
350
- } else if (use_mips32r2_instructions) {
351
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? lo : hi);
352
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? hi : lo);
353
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
354
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
355
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP0, base, 0);
356
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP1, base, 4);
357
- } else {
358
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
359
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 0);
360
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
361
- tcg_out_opc_imm(s, OPC_SW, TCG_TMP3, base, 4);
362
- }
363
- break;
364
case MO_64:
365
if (TCG_TARGET_REG_BITS == 64) {
366
tcg_out_opc_imm(s, OPC_SD, lo, base, 0);
367
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi,
368
tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4);
369
}
370
break;
371
-
372
default:
373
g_assert_not_reached();
374
}
375
@@ -XXX,XX +XXX,XX @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
376
const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR;
377
const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL;
378
379
- /* Don't clutter the code below with checks to avoid bswapping ZERO. */
380
- if ((lo | hi) == 0) {
381
- opc &= ~MO_BSWAP;
382
- }
383
-
384
- switch (opc & (MO_SIZE | MO_BSWAP)) {
385
- case MO_16 | MO_BE:
386
+ switch (opc & MO_SIZE) {
387
+ case MO_16:
388
tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
389
- tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 0);
390
- tcg_out_opc_imm(s, OPC_SB, lo, base, 1);
391
+ tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? TCG_TMP0 : lo, base, 0);
392
+ tcg_out_opc_imm(s, OPC_SB, HOST_BIG_ENDIAN ? lo : TCG_TMP0, base, 1);
393
break;
394
395
- case MO_16 | MO_LE:
396
- tcg_out_opc_sa(s, OPC_SRL, TCG_TMP0, lo, 8);
397
- tcg_out_opc_imm(s, OPC_SB, lo, base, 0);
398
- tcg_out_opc_imm(s, OPC_SB, TCG_TMP0, base, 1);
399
- break;
400
-
401
- case MO_32 | MO_BSWAP:
402
- tcg_out_bswap32(s, TCG_TMP3, lo, 0);
403
- lo = TCG_TMP3;
404
- /* fall through */
405
case MO_32:
406
tcg_out_opc_imm(s, sw1, lo, base, 0);
407
tcg_out_opc_imm(s, sw2, lo, base, 3);
408
break;
409
410
- case MO_64 | MO_BSWAP:
411
- if (TCG_TARGET_REG_BITS == 64) {
412
- tcg_out_bswap64(s, TCG_TMP3, lo);
413
- lo = TCG_TMP3;
414
- } else if (use_mips32r2_instructions) {
415
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP0, 0, MIPS_BE ? hi : lo);
416
- tcg_out_opc_reg(s, OPC_WSBH, TCG_TMP1, 0, MIPS_BE ? lo : hi);
417
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP0, TCG_TMP0, 16);
418
- tcg_out_opc_sa(s, OPC_ROTR, TCG_TMP1, TCG_TMP1, 16);
419
- hi = MIPS_BE ? TCG_TMP0 : TCG_TMP1;
420
- lo = MIPS_BE ? TCG_TMP1 : TCG_TMP0;
421
- } else {
422
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? lo : hi, 0);
423
- tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 0 + 0);
424
- tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 0 + 3);
425
- tcg_out_bswap32(s, TCG_TMP3, MIPS_BE ? hi : lo, 0);
426
- tcg_out_opc_imm(s, sw1, TCG_TMP3, base, 4 + 0);
427
- tcg_out_opc_imm(s, sw2, TCG_TMP3, base, 4 + 3);
58
- break;
428
- break;
59
- }
429
- }
60
-
430
- /* fall through */
61
/* Simplify using known-zero bits. Currently only ops with a single
431
case MO_64:
62
output argument is supported. */
432
if (TCG_TARGET_REG_BITS == 64) {
63
z_mask = -1;
433
tcg_out_opc_imm(s, sd1, lo, base, 0);
64
--
434
--
65
2.25.1
435
2.34.1
66
436
67
437
diff view generated by jsdifflib
1
This "garbage" setting pre-dates the addition of the type
1
Compare the address vs the tlb entry with sign-extended values.
2
changing opcodes INDEX_op_ext_i32_i64, INDEX_op_extu_i32_i64,
2
This simplifies the page+alignment mask constant, and the
3
and INDEX_op_extr{l,h}_i64_i32.
3
generation of the last byte address for the misaligned test.
4
4
5
So now we have a definitive points at which to adjust z_mask
5
Move the tlb addend load up, and the zero-extension down.
6
to eliminate such bits from the 32-bit operands.
6
7
This frees up a register, which allows us use TMP3 as the returned base
8
address register instead of A0, which we were using as a 5th temporary.
7
9
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
10
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
12
---
12
tcg/optimize.c | 35 ++++++++++++++++-------------------
13
tcg/mips/tcg-target.c.inc | 38 ++++++++++++++++++--------------------
13
1 file changed, 16 insertions(+), 19 deletions(-)
14
1 file changed, 18 insertions(+), 20 deletions(-)
14
15
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
16
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
18
--- a/tcg/mips/tcg-target.c.inc
18
+++ b/tcg/optimize.c
19
+++ b/tcg/mips/tcg-target.c.inc
19
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
20
@@ -XXX,XX +XXX,XX @@ typedef enum {
20
ti->is_const = true;
21
ALIAS_PADDI = sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
21
ti->val = ts->val;
22
ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
22
ti->z_mask = ts->val;
23
? OPC_SRL : OPC_DSRL,
23
- if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
24
+ ALIAS_TADDI = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
24
- /* High bits of a 32-bit quantity are garbage. */
25
+ ? OPC_ADDIU : OPC_DADDIU,
25
- ti->z_mask |= ~0xffffffffull;
26
} MIPSInsn;
26
- }
27
28
/*
29
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
30
int add_off = offsetof(CPUTLBEntry, addend);
31
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
32
: offsetof(CPUTLBEntry, addr_write);
33
- target_ulong tlb_mask;
34
35
ldst = new_ldst_label(s);
36
ldst->is_ld = is_ld;
37
ldst->oi = oi;
38
ldst->addrlo_reg = addrlo;
39
ldst->addrhi_reg = addrhi;
40
- base = TCG_REG_A0;
41
42
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
43
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
44
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
45
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
46
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
27
} else {
47
} else {
28
ti->is_const = false;
48
- tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
29
ti->z_mask = -1;
49
- : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
30
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
50
- TCG_TMP0, TCG_TMP3, cmp_off);
31
TCGTemp *src_ts = arg_temp(src);
51
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
32
TempOptInfo *di;
52
}
33
TempOptInfo *si;
53
34
- uint64_t z_mask;
54
- /* Zero extend a 32-bit guest address for a 64-bit host. */
35
TCGOpcode new_op;
55
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
36
56
- tcg_out_ext32u(s, base, addrlo);
37
if (ts_are_copies(dst_ts, src_ts)) {
57
- addrlo = base;
38
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
58
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
39
op->args[0] = dst;
59
+ /* Load the tlb addend for the fast path. */
40
op->args[1] = src;
60
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
41
61
}
42
- z_mask = si->z_mask;
43
- if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
44
- /* High bits of the destination are now garbage. */
45
- z_mask |= ~0xffffffffull;
46
- }
47
- di->z_mask = z_mask;
48
+ di->z_mask = si->z_mask;
49
50
if (src_ts->type == dst_ts->type) {
51
TempOptInfo *ni = ts_info(si->next_copy);
52
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
53
static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
54
TCGArg dst, uint64_t val)
55
{
56
- /* Convert movi to mov with constant temp. */
57
- TCGTemp *tv = tcg_constant_internal(ctx->type, val);
58
+ TCGTemp *tv;
59
60
+ if (ctx->type == TCG_TYPE_I32) {
61
+ val = (int32_t)val;
62
+ }
63
+
64
+ /* Convert movi to mov with constant temp. */
65
+ tv = tcg_constant_internal(ctx->type, val);
66
init_ts_info(ctx, tv);
67
return tcg_opt_gen_mov(ctx, op, dst, temp_arg(tv));
68
}
69
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
70
uint64_t z_mask = ctx->z_mask;
71
62
72
/*
63
/*
73
- * 32-bit ops generate 32-bit results. For the result is zero test
64
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
74
- * below, we can ignore high bits, but for further optimizations we
65
* For unaligned accesses, compare against the end of the access to
75
- * need to record that the high bits contain garbage.
66
* verify that it does not cross a page boundary.
76
+ * 32-bit ops generate 32-bit results, which for the purpose of
77
+ * simplifying tcg are sign-extended. Certainly that's how we
78
+ * represent our constants elsewhere. Note that the bits will
79
+ * be reset properly for a 64-bit value when encountering the
80
+ * type changing opcodes.
81
*/
67
*/
82
if (ctx->type == TCG_TYPE_I32) {
68
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
83
- ctx->z_mask |= MAKE_64BIT_MASK(32, 32);
69
- tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
84
- a_mask &= MAKE_64BIT_MASK(0, 32);
70
- if (a_mask >= s_mask) {
85
- z_mask &= MAKE_64BIT_MASK(0, 32);
71
- tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
86
+ a_mask = (int32_t)a_mask;
72
- } else {
87
+ z_mask = (int32_t)z_mask;
73
- tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrlo, s_mask - a_mask);
88
+ ctx->z_mask = z_mask;
74
+ tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
75
+ if (a_mask < s_mask) {
76
+ tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrlo, s_mask - a_mask);
77
tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
78
+ } else {
79
+ tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrlo);
89
}
80
}
90
81
91
if (z_mask == 0) {
82
- if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
83
- /* Load the tlb addend for the fast path. */
84
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
85
+ /* Zero extend a 32-bit guest address for a 64-bit host. */
86
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
87
+ tcg_out_ext32u(s, TCG_TMP2, addrlo);
88
+ addrlo = TCG_TMP2;
89
}
90
91
ldst->label_ptr[0] = s->code_ptr;
92
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
93
tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
94
95
/* Load the tlb addend for the fast path. */
96
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
97
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
98
99
ldst->label_ptr[1] = s->code_ptr;
100
tcg_out_opc_br(s, OPC_BNE, addrhi, TCG_TMP0);
101
}
102
103
/* delay slot */
104
- tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrlo);
105
+ base = TCG_TMP3;
106
+ tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP3, addrlo);
107
#else
108
if (a_mask && (use_mips32r6_instructions || a_bits != s_bits)) {
109
ldst = new_ldst_label(s);
92
--
110
--
93
2.25.1
111
2.34.1
94
112
95
113
diff view generated by jsdifflib
1
This will expose the variable to subroutines that
1
The softmmu tlb uses TCG_REG_TMP[0-3], not any of the normally available
2
will be broken out of tcg_optimize.
2
registers. Now that we handle overlap betwen inputs and helper arguments,
3
and have eliminated use of A0, we can allow any allocatable reg.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
7
---
9
tcg/optimize.c | 11 ++++++-----
8
tcg/mips/tcg-target-con-set.h | 13 +++++--------
10
1 file changed, 6 insertions(+), 5 deletions(-)
9
tcg/mips/tcg-target-con-str.h | 2 --
10
tcg/mips/tcg-target.c.inc | 30 ++++++++----------------------
11
3 files changed, 13 insertions(+), 32 deletions(-)
11
12
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
--- a/tcg/mips/tcg-target-con-set.h
15
+++ b/tcg/optimize.c
16
+++ b/tcg/mips/tcg-target-con-set.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
17
@@ -XXX,XX +XXX,XX @@
17
18
C_O0_I1(r)
18
typedef struct OptContext {
19
C_O0_I2(rZ, r)
19
TCGContext *tcg;
20
C_O0_I2(rZ, rZ)
20
+ TCGOp *prev_mb;
21
-C_O0_I2(SZ, S)
21
TCGTempSet temps_used;
22
-C_O0_I3(SZ, S, S)
22
} OptContext;
23
-C_O0_I3(SZ, SZ, S)
23
24
+C_O0_I3(rZ, r, r)
24
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
25
+C_O0_I3(rZ, rZ, r)
25
void tcg_optimize(TCGContext *s)
26
C_O0_I4(rZ, rZ, rZ, rZ)
27
-C_O0_I4(SZ, SZ, S, S)
28
-C_O1_I1(r, L)
29
+C_O0_I4(rZ, rZ, r, r)
30
C_O1_I1(r, r)
31
C_O1_I2(r, 0, rZ)
32
-C_O1_I2(r, L, L)
33
+C_O1_I2(r, r, r)
34
C_O1_I2(r, r, ri)
35
C_O1_I2(r, r, rI)
36
C_O1_I2(r, r, rIK)
37
@@ -XXX,XX +XXX,XX @@ C_O1_I2(r, rZ, rN)
38
C_O1_I2(r, rZ, rZ)
39
C_O1_I4(r, rZ, rZ, rZ, 0)
40
C_O1_I4(r, rZ, rZ, rZ, rZ)
41
-C_O2_I1(r, r, L)
42
-C_O2_I2(r, r, L, L)
43
+C_O2_I1(r, r, r)
44
C_O2_I2(r, r, r, r)
45
C_O2_I4(r, r, rZ, rZ, rN, rN)
46
diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h
47
index XXXXXXX..XXXXXXX 100644
48
--- a/tcg/mips/tcg-target-con-str.h
49
+++ b/tcg/mips/tcg-target-con-str.h
50
@@ -XXX,XX +XXX,XX @@
51
* REGS(letter, register_mask)
52
*/
53
REGS('r', ALL_GENERAL_REGS)
54
-REGS('L', ALL_QLOAD_REGS)
55
-REGS('S', ALL_QSTORE_REGS)
56
57
/*
58
* Define constraint letters for constants:
59
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
60
index XXXXXXX..XXXXXXX 100644
61
--- a/tcg/mips/tcg-target.c.inc
62
+++ b/tcg/mips/tcg-target.c.inc
63
@@ -XXX,XX +XXX,XX @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
64
#define TCG_CT_CONST_WSZ 0x2000 /* word size */
65
66
#define ALL_GENERAL_REGS 0xffffffffu
67
-#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0))
68
-
69
-#ifdef CONFIG_SOFTMMU
70
-#define ALL_QLOAD_REGS \
71
- (NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2))
72
-#define ALL_QSTORE_REGS \
73
- (NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS \
74
- ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3) \
75
- : (1 << TCG_REG_A1)))
76
-#else
77
-#define ALL_QLOAD_REGS NOA0_REGS
78
-#define ALL_QSTORE_REGS NOA0_REGS
79
-#endif
80
-
81
82
static bool is_p2m1(tcg_target_long val)
26
{
83
{
27
int nb_temps, nb_globals, i;
84
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
28
- TCGOp *op, *op_next, *prev_mb = NULL;
85
29
+ TCGOp *op, *op_next;
86
case INDEX_op_qemu_ld_i32:
30
OptContext ctx = { .tcg = s };
87
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
31
88
- ? C_O1_I1(r, L) : C_O1_I2(r, L, L));
32
/* Array VALS has an element for each temp.
89
+ ? C_O1_I1(r, r) : C_O1_I2(r, r, r));
33
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
90
case INDEX_op_qemu_st_i32:
34
}
91
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
35
92
- ? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S));
36
/* Eliminate duplicate and redundant fence instructions. */
93
+ ? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
37
- if (prev_mb) {
94
case INDEX_op_qemu_ld_i64:
38
+ if (ctx.prev_mb) {
95
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
39
switch (opc) {
96
- : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L)
40
case INDEX_op_mb:
97
- : C_O2_I2(r, r, L, L));
41
/* Merge two barriers of the same type into one,
98
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
42
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
99
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
43
* barrier. This is stricter than specified but for
100
+ : C_O2_I2(r, r, r, r));
44
* the purposes of TCG is better than not optimizing.
101
case INDEX_op_qemu_st_i64:
45
*/
102
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S)
46
- prev_mb->args[0] |= op->args[0];
103
- : TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S)
47
+ ctx.prev_mb->args[0] |= op->args[0];
104
- : C_O0_I4(SZ, SZ, S, S));
48
tcg_op_remove(s, op);
105
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
49
break;
106
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
50
107
+ : C_O0_I4(rZ, rZ, r, r));
51
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
108
52
case INDEX_op_qemu_st_i64:
109
default:
53
case INDEX_op_call:
110
g_assert_not_reached();
54
/* Opcodes that touch guest memory stop the optimization. */
55
- prev_mb = NULL;
56
+ ctx.prev_mb = NULL;
57
break;
58
}
59
} else if (opc == INDEX_op_mb) {
60
- prev_mb = op;
61
+ ctx.prev_mb = op;
62
}
63
}
64
}
65
--
111
--
66
2.25.1
112
2.34.1
67
113
68
114
diff view generated by jsdifflib
1
Certain targets, like riscv, produce signed 32-bit results.
1
Allocate TCG_REG_TMP2. Use R0, TMP1, TMP2 instead of any of
2
This can lead to lots of redundant extensions as values are
2
the normally allocated registers for the tlb load.
3
manipulated.
4
5
Begin by tracking only the obvious sign-extensions, and
6
converting them to simple copies when possible.
7
3
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
9
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
7
---
12
tcg/optimize.c | 123 ++++++++++++++++++++++++++++++++++++++++---------
8
tcg/ppc/tcg-target.c.inc | 78 ++++++++++++++++++++++++----------------
13
1 file changed, 102 insertions(+), 21 deletions(-)
9
1 file changed, 47 insertions(+), 31 deletions(-)
14
10
15
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
16
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
17
--- a/tcg/optimize.c
13
--- a/tcg/ppc/tcg-target.c.inc
18
+++ b/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target.c.inc
19
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
15
@@ -XXX,XX +XXX,XX @@
20
TCGTemp *next_copy;
16
#else
21
uint64_t val;
17
# define TCG_REG_TMP1 TCG_REG_R12
22
uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
18
#endif
23
+ uint64_t s_mask; /* a left-aligned mask of clrsb(value) bits. */
19
+#define TCG_REG_TMP2 TCG_REG_R11
24
} TempOptInfo;
20
25
21
#define TCG_VEC_TMP1 TCG_REG_V0
26
typedef struct OptContext {
22
#define TCG_VEC_TMP2 TCG_REG_V1
27
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
23
@@ -XXX,XX +XXX,XX @@ static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
28
/* In flight values from optimization. */
24
/*
29
uint64_t a_mask; /* mask bit is 0 iff value identical to first input */
25
* For the purposes of ppc32 sorting 4 input registers into 4 argument
30
uint64_t z_mask; /* mask bit is 0 iff value bit is 0 */
26
* registers, there is an outside chance we would require 3 temps.
31
+ uint64_t s_mask; /* mask of clrsb(value) bits */
27
- * Because of constraints, no inputs are in r3, and env will not be
32
TCGType type;
28
- * placed into r3 until after the sorting is done, and is thus free.
33
} OptContext;
29
*/
34
30
static const TCGLdstHelperParam ldst_helper_param = {
35
+/* Calculate the smask for a specific value. */
31
.ra_gen = ldst_ra_gen,
36
+static uint64_t smask_from_value(uint64_t value)
32
.ntmp = 3,
37
+{
33
- .tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 }
38
+ int rep = clrsb64(value);
34
+ .tmp = { TCG_REG_TMP1, TCG_REG_TMP2, TCG_REG_R0 }
39
+ return ~(~0ull >> rep);
35
};
40
+}
36
41
+
37
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
42
+/*
38
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
43
+ * Calculate the smask for a given set of known-zeros.
39
/* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
44
+ * If there are lots of zeros on the left, we can consider the remainder
40
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
45
+ * an unsigned field, and thus the corresponding signed field is one bit
41
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
46
+ * larger.
42
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
47
+ */
43
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
48
+static uint64_t smask_from_zmask(uint64_t zmask)
44
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
49
+{
45
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
50
+ /*
46
51
+ * Only the 0 bits are significant for zmask, thus the msb itself
47
/* Extract the page index, shifted into place for tlb index. */
52
+ * must be zero, else we have no sign information.
48
if (TCG_TARGET_REG_BITS == 32) {
53
+ */
49
- tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
54
+ int rep = clz64(zmask);
50
+ tcg_out_shri32(s, TCG_REG_R0, addrlo,
55
+ if (rep == 0) {
51
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
56
+ return 0;
57
+ }
58
+ rep -= 1;
59
+ return ~(~0ull >> rep);
60
+}
61
+
62
static inline TempOptInfo *ts_info(TCGTemp *ts)
63
{
64
return ts->state_ptr;
65
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
66
ti->prev_copy = ts;
67
ti->is_const = false;
68
ti->z_mask = -1;
69
+ ti->s_mask = 0;
70
}
71
72
static void reset_temp(TCGArg arg)
73
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
74
ti->is_const = true;
75
ti->val = ts->val;
76
ti->z_mask = ts->val;
77
+ ti->s_mask = smask_from_value(ts->val);
78
} else {
52
} else {
79
ti->is_const = false;
53
- tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
80
ti->z_mask = -1;
54
+ tcg_out_shri64(s, TCG_REG_R0, addrlo,
81
+ ti->s_mask = 0;
55
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
82
}
56
}
83
}
57
- tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
84
58
+ tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
85
@@ -XXX,XX +XXX,XX @@ static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
59
86
op->args[1] = src;
60
- /* Load the TLB comparator. */
87
61
+ /* Load the (low part) TLB comparator into TMP2. */
88
di->z_mask = si->z_mask;
62
if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
89
+ di->s_mask = si->s_mask;
63
uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
90
64
? LWZUX : LDUX);
91
if (src_ts->type == dst_ts->type) {
65
- tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
92
TempOptInfo *ni = ts_info(si->next_copy);
66
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
93
@@ -XXX,XX +XXX,XX @@ static void finish_folding(OptContext *ctx, TCGOp *op)
67
} else {
94
68
- tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
95
nb_oargs = def->nb_oargs;
69
+ tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
96
for (i = 0; i < nb_oargs; i++) {
70
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
97
- reset_temp(op->args[i]);
71
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
98
+ TCGTemp *ts = arg_temp(op->args[i]);
72
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
99
+ reset_ts(ts);
73
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2,
100
/*
74
+ TCG_REG_TMP1, cmp_off + 4 * HOST_BIG_ENDIAN);
101
- * Save the corresponding known-zero bits mask for the
75
} else {
102
+ * Save the corresponding known-zero/sign bits mask for the
76
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
103
* first output argument (only one supported so far).
77
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
104
*/
105
if (i == 0) {
106
- arg_info(op->args[i])->z_mask = ctx->z_mask;
107
+ ts_info(ts)->z_mask = ctx->z_mask;
108
+ ts_info(ts)->s_mask = ctx->s_mask;
109
}
78
}
110
}
79
}
111
}
80
112
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
81
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
113
{
82
* Load the TLB addend for use on the fast path.
114
uint64_t a_mask = ctx->a_mask;
83
* Do this asap to minimize any load use delay.
115
uint64_t z_mask = ctx->z_mask;
84
*/
116
+ uint64_t s_mask = ctx->s_mask;
85
- h->base = TCG_REG_R3;
117
86
- tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3,
118
/*
87
- offsetof(CPUTLBEntry, addend));
119
* 32-bit ops generate 32-bit results, which for the purpose of
88
+ if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
120
@@ -XXX,XX +XXX,XX @@ static bool fold_masks(OptContext *ctx, TCGOp *op)
89
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
121
if (ctx->type == TCG_TYPE_I32) {
90
+ offsetof(CPUTLBEntry, addend));
122
a_mask = (int32_t)a_mask;
91
+ }
123
z_mask = (int32_t)z_mask;
92
124
+ s_mask |= MAKE_64BIT_MASK(32, 32);
93
- /* Clear the non-page, non-alignment bits from the address */
125
ctx->z_mask = z_mask;
94
+ /* Clear the non-page, non-alignment bits from the address in R0. */
126
+ ctx->s_mask = s_mask;
95
if (TCG_TARGET_REG_BITS == 32) {
96
/*
97
* We don't support unaligned accesses on 32-bits.
98
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
99
if (TARGET_LONG_BITS == 32) {
100
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
101
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
102
- /* Zero-extend the address for use in the final address. */
103
- tcg_out_ext32u(s, TCG_REG_R4, addrlo);
104
- addrlo = TCG_REG_R4;
105
} else if (a_bits == 0) {
106
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
107
} else {
108
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
109
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
110
}
127
}
111
}
128
112
- h->index = addrlo;
129
if (z_mask == 0) {
113
130
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
114
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
131
115
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
132
static bool fold_bswap(OptContext *ctx, TCGOp *op)
116
+ /* Low part comparison into cr7. */
133
{
117
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
134
- uint64_t z_mask, sign;
118
0, 7, TCG_TYPE_I32);
135
+ uint64_t z_mask, s_mask, sign;
119
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
136
120
+
137
if (arg_is_const(op->args[1])) {
121
+ /* Load the high part TLB comparator into TMP2. */
138
uint64_t t = arg_info(op->args[1])->val;
122
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2, TCG_REG_TMP1,
139
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
123
+ cmp_off + 4 * !HOST_BIG_ENDIAN);
124
+
125
+ /* Load addend, deferred for this case. */
126
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
127
+ offsetof(CPUTLBEntry, addend));
128
+
129
+ /* High part comparison into cr6. */
130
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_TMP2, 0, 6, TCG_TYPE_I32);
131
+
132
+ /* Combine comparisons into cr7. */
133
tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
134
} else {
135
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
136
+ /* Full comparison into cr7. */
137
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP2,
138
0, 7, TCG_TYPE_TL);
140
}
139
}
141
140
142
z_mask = arg_info(op->args[1])->z_mask;
141
/* Load a pointer into the current opcode w/conditional branch-link. */
142
ldst->label_ptr[0] = s->code_ptr;
143
tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
143
+
144
+
144
switch (op->opc) {
145
+ h->base = TCG_REG_TMP1;
145
case INDEX_op_bswap16_i32:
146
#else
146
case INDEX_op_bswap16_i64:
147
if (a_bits) {
147
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
148
ldst = new_ldst_label(s);
148
default:
149
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
149
g_assert_not_reached();
150
}
150
}
151
+ s_mask = smask_from_zmask(z_mask);
151
152
152
h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
153
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
153
- h->index = addrlo;
154
case TCG_BSWAP_OZ:
154
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
155
@@ -XXX,XX +XXX,XX @@ static bool fold_bswap(OptContext *ctx, TCGOp *op)
155
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
156
/* If the sign bit may be 1, force all the bits above to 1. */
156
- h->index = TCG_REG_TMP1;
157
if (z_mask & sign) {
158
z_mask |= sign;
159
+ s_mask = sign << 1;
160
}
161
break;
162
default:
163
/* The high bits are undefined: force all bits above the sign to 1. */
164
z_mask |= sign << 1;
165
+ s_mask = 0;
166
break;
167
}
168
ctx->z_mask = z_mask;
169
+ ctx->s_mask = s_mask;
170
171
return fold_masks(ctx, op);
172
}
173
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
174
static bool fold_extract(OptContext *ctx, TCGOp *op)
175
{
176
uint64_t z_mask_old, z_mask;
177
+ int pos = op->args[2];
178
+ int len = op->args[3];
179
180
if (arg_is_const(op->args[1])) {
181
uint64_t t;
182
183
t = arg_info(op->args[1])->val;
184
- t = extract64(t, op->args[2], op->args[3]);
185
+ t = extract64(t, pos, len);
186
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
187
}
188
189
z_mask_old = arg_info(op->args[1])->z_mask;
190
- z_mask = extract64(z_mask_old, op->args[2], op->args[3]);
191
- if (op->args[2] == 0) {
192
+ z_mask = extract64(z_mask_old, pos, len);
193
+ if (pos == 0) {
194
ctx->a_mask = z_mask_old ^ z_mask;
195
}
196
ctx->z_mask = z_mask;
197
+ ctx->s_mask = smask_from_zmask(z_mask);
198
199
return fold_masks(ctx, op);
200
}
201
@@ -XXX,XX +XXX,XX @@ static bool fold_extract2(OptContext *ctx, TCGOp *op)
202
203
static bool fold_exts(OptContext *ctx, TCGOp *op)
204
{
205
- uint64_t z_mask_old, z_mask, sign;
206
+ uint64_t s_mask_old, s_mask, z_mask, sign;
207
bool type_change = false;
208
209
if (fold_const1(ctx, op)) {
210
return true;
211
}
212
213
- z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
214
+ z_mask = arg_info(op->args[1])->z_mask;
215
+ s_mask = arg_info(op->args[1])->s_mask;
216
+ s_mask_old = s_mask;
217
218
switch (op->opc) {
219
CASE_OP_32_64(ext8s):
220
@@ -XXX,XX +XXX,XX @@ static bool fold_exts(OptContext *ctx, TCGOp *op)
221
222
if (z_mask & sign) {
223
z_mask |= sign;
224
- } else if (!type_change) {
225
- ctx->a_mask = z_mask_old ^ z_mask;
226
}
227
+ s_mask |= sign << 1;
228
+
229
ctx->z_mask = z_mask;
230
+ ctx->s_mask = s_mask;
231
+ if (!type_change) {
232
+ ctx->a_mask = s_mask & ~s_mask_old;
233
+ }
234
235
return fold_masks(ctx, op);
236
}
237
@@ -XXX,XX +XXX,XX @@ static bool fold_extu(OptContext *ctx, TCGOp *op)
238
}
239
240
ctx->z_mask = z_mask;
241
+ ctx->s_mask = smask_from_zmask(z_mask);
242
if (!type_change) {
243
ctx->a_mask = z_mask_old ^ z_mask;
244
}
245
@@ -XXX,XX +XXX,XX @@ static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
246
MemOp mop = get_memop(oi);
247
int width = 8 * memop_size(mop);
248
249
- if (!(mop & MO_SIGN) && width < 64) {
250
- ctx->z_mask = MAKE_64BIT_MASK(0, width);
251
+ if (width < 64) {
252
+ ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
253
+ if (!(mop & MO_SIGN)) {
254
+ ctx->z_mask = MAKE_64BIT_MASK(0, width);
255
+ ctx->s_mask <<= 1;
256
+ }
257
}
258
259
/* Opcodes that touch guest memory stop the mb optimization. */
260
@@ -XXX,XX +XXX,XX @@ static bool fold_setcond2(OptContext *ctx, TCGOp *op)
261
262
static bool fold_sextract(OptContext *ctx, TCGOp *op)
263
{
264
- int64_t z_mask_old, z_mask;
265
+ uint64_t z_mask, s_mask, s_mask_old;
266
+ int pos = op->args[2];
267
+ int len = op->args[3];
268
269
if (arg_is_const(op->args[1])) {
270
uint64_t t;
271
272
t = arg_info(op->args[1])->val;
273
- t = sextract64(t, op->args[2], op->args[3]);
274
+ t = sextract64(t, pos, len);
275
return tcg_opt_gen_movi(ctx, op, op->args[0], t);
276
}
277
278
- z_mask_old = arg_info(op->args[1])->z_mask;
279
- z_mask = sextract64(z_mask_old, op->args[2], op->args[3]);
280
- if (op->args[2] == 0 && z_mask >= 0) {
281
- ctx->a_mask = z_mask_old ^ z_mask;
282
- }
157
- }
283
+ z_mask = arg_info(op->args[1])->z_mask;
158
#endif
284
+ z_mask = sextract64(z_mask, pos, len);
159
285
ctx->z_mask = z_mask;
160
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
286
161
+ /* Zero-extend the guest address for use in the host address. */
287
+ s_mask_old = arg_info(op->args[1])->s_mask;
162
+ tcg_out_ext32u(s, TCG_REG_R0, addrlo);
288
+ s_mask = sextract64(s_mask_old, pos, len);
163
+ h->index = TCG_REG_R0;
289
+ s_mask |= MAKE_64BIT_MASK(len, 64 - len);
164
+ } else {
290
+ ctx->s_mask = s_mask;
165
+ h->index = addrlo;
291
+
292
+ if (pos == 0) {
293
+ ctx->a_mask = s_mask & ~s_mask_old;
294
+ }
166
+ }
295
+
167
+
296
return fold_masks(ctx, op);
168
return ldst;
297
}
169
}
298
170
299
@@ -XXX,XX +XXX,XX @@ static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
171
@@ -XXX,XX +XXX,XX @@ static void tcg_target_init(TCGContext *s)
300
{
172
#if defined(_CALL_SYSV) || TCG_TARGET_REG_BITS == 64
301
/* We can't do any folding with a load, but we can record bits. */
173
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
302
switch (op->opc) {
174
#endif
303
+ CASE_OP_32_64(ld8s):
175
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
304
+ ctx->s_mask = MAKE_64BIT_MASK(8, 56);
176
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
305
+ break;
177
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
306
CASE_OP_32_64(ld8u):
178
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP1);
307
ctx->z_mask = MAKE_64BIT_MASK(0, 8);
179
tcg_regset_set_reg(s->reserved_regs, TCG_VEC_TMP2);
308
+ ctx->s_mask = MAKE_64BIT_MASK(9, 55);
180
if (USE_REG_TB) {
309
+ break;
310
+ CASE_OP_32_64(ld16s):
311
+ ctx->s_mask = MAKE_64BIT_MASK(16, 48);
312
break;
313
CASE_OP_32_64(ld16u):
314
ctx->z_mask = MAKE_64BIT_MASK(0, 16);
315
+ ctx->s_mask = MAKE_64BIT_MASK(17, 47);
316
+ break;
317
+ case INDEX_op_ld32s_i64:
318
+ ctx->s_mask = MAKE_64BIT_MASK(32, 32);
319
break;
320
case INDEX_op_ld32u_i64:
321
ctx->z_mask = MAKE_64BIT_MASK(0, 32);
322
+ ctx->s_mask = MAKE_64BIT_MASK(33, 31);
323
break;
324
default:
325
g_assert_not_reached();
326
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
327
ctx.type = TCG_TYPE_I32;
328
}
329
330
- /* Assume all bits affected, and no bits known zero. */
331
+ /* Assume all bits affected, no bits known zero, no sign reps. */
332
ctx.a_mask = -1;
333
ctx.z_mask = -1;
334
+ ctx.s_mask = 0;
335
336
/*
337
* Process each opcode.
338
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
339
case INDEX_op_extrh_i64_i32:
340
done = fold_extu(&ctx, op);
341
break;
342
+ CASE_OP_32_64(ld8s):
343
CASE_OP_32_64(ld8u):
344
+ CASE_OP_32_64(ld16s):
345
CASE_OP_32_64(ld16u):
346
+ case INDEX_op_ld32s_i64:
347
case INDEX_op_ld32u_i64:
348
done = fold_tcg_ld(&ctx, op);
349
break;
350
--
181
--
351
2.25.1
182
2.34.1
352
183
353
184
diff view generated by jsdifflib
1
Break the final cleanup clause out of the main switch
1
The softmmu tlb uses TCG_REG_{TMP1,TMP2,R0}, not any of the normally
2
statement. When fully folding an opcode to mov/movi,
2
available registers. Now that we handle overlap betwen inputs and
3
use "continue" to process the next opcode, else break
3
helper arguments, we can allow any allocatable reg.
4
to fall into the final cleanup.
5
4
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
8
---
11
tcg/optimize.c | 190 ++++++++++++++++++++++++-------------------------
9
tcg/ppc/tcg-target-con-set.h | 11 ++++-------
12
1 file changed, 94 insertions(+), 96 deletions(-)
10
tcg/ppc/tcg-target-con-str.h | 2 --
11
tcg/ppc/tcg-target.c.inc | 32 ++++++++++----------------------
12
3 files changed, 14 insertions(+), 31 deletions(-)
13
13
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
16
--- a/tcg/ppc/tcg-target-con-set.h
17
+++ b/tcg/optimize.c
17
+++ b/tcg/ppc/tcg-target-con-set.h
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
18
@@ -XXX,XX +XXX,XX @@
19
switch (opc) {
19
C_O0_I1(r)
20
CASE_OP_32_64_VEC(mov):
20
C_O0_I2(r, r)
21
tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
21
C_O0_I2(r, ri)
22
- break;
22
-C_O0_I2(S, S)
23
+ continue;
23
C_O0_I2(v, r)
24
24
-C_O0_I3(S, S, S)
25
case INDEX_op_dup_vec:
25
+C_O0_I3(r, r, r)
26
if (arg_is_const(op->args[1])) {
26
C_O0_I4(r, r, ri, ri)
27
tmp = arg_info(op->args[1])->val;
27
-C_O0_I4(S, S, S, S)
28
tmp = dup_const(TCGOP_VECE(op), tmp);
28
-C_O1_I1(r, L)
29
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
29
+C_O0_I4(r, r, r, r)
30
- break;
30
C_O1_I1(r, r)
31
+ continue;
31
C_O1_I1(v, r)
32
}
32
C_O1_I1(v, v)
33
- goto do_default;
33
C_O1_I1(v, vr)
34
+ break;
34
C_O1_I2(r, 0, rZ)
35
35
-C_O1_I2(r, L, L)
36
case INDEX_op_dup2_vec:
36
C_O1_I2(r, rI, ri)
37
assert(TCG_TARGET_REG_BITS == 32);
37
C_O1_I2(r, rI, rT)
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
38
C_O1_I2(r, r, r)
39
tcg_opt_gen_movi(s, &ctx, op, op->args[0],
39
@@ -XXX,XX +XXX,XX @@ C_O1_I2(v, v, v)
40
deposit64(arg_info(op->args[1])->val, 32, 32,
40
C_O1_I3(v, v, v, v)
41
arg_info(op->args[2])->val));
41
C_O1_I4(r, r, ri, rZ, rZ)
42
- break;
42
C_O1_I4(r, r, r, ri, ri)
43
+ continue;
43
-C_O2_I1(L, L, L)
44
} else if (args_are_copies(op->args[1], op->args[2])) {
44
-C_O2_I2(L, L, L, L)
45
op->opc = INDEX_op_dup_vec;
45
+C_O2_I1(r, r, r)
46
TCGOP_VECE(op) = MO_32;
46
+C_O2_I2(r, r, r, r)
47
nb_iargs = 1;
47
C_O2_I4(r, r, rI, rZM, r, r)
48
}
48
C_O2_I4(r, r, r, r, rI, rZM)
49
- goto do_default;
49
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
50
+ break;
50
index XXXXXXX..XXXXXXX 100644
51
51
--- a/tcg/ppc/tcg-target-con-str.h
52
CASE_OP_32_64(not):
52
+++ b/tcg/ppc/tcg-target-con-str.h
53
CASE_OP_32_64(neg):
53
@@ -XXX,XX +XXX,XX @@ REGS('A', 1u << TCG_REG_R3)
54
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
54
REGS('B', 1u << TCG_REG_R4)
55
if (arg_is_const(op->args[1])) {
55
REGS('C', 1u << TCG_REG_R5)
56
tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
56
REGS('D', 1u << TCG_REG_R6)
57
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
57
-REGS('L', ALL_QLOAD_REGS)
58
- break;
58
-REGS('S', ALL_QSTORE_REGS)
59
+ continue;
59
60
}
60
/*
61
- goto do_default;
61
* Define constraint letters for constants:
62
+ break;
62
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
63
63
index XXXXXXX..XXXXXXX 100644
64
CASE_OP_32_64(bswap16):
64
--- a/tcg/ppc/tcg-target.c.inc
65
CASE_OP_32_64(bswap32):
65
+++ b/tcg/ppc/tcg-target.c.inc
66
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
66
@@ -XXX,XX +XXX,XX @@
67
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
67
#define ALL_GENERAL_REGS 0xffffffffu
68
op->args[2]);
68
#define ALL_VECTOR_REGS 0xffffffff00000000ull
69
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
69
70
- break;
70
-#ifdef CONFIG_SOFTMMU
71
+ continue;
71
-#define ALL_QLOAD_REGS \
72
}
72
- (ALL_GENERAL_REGS & \
73
- goto do_default;
73
- ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
74
+ break;
74
-#define ALL_QSTORE_REGS \
75
75
- (ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
76
CASE_OP_32_64(add):
76
- (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
77
CASE_OP_32_64(sub):
77
-#else
78
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
78
-#define ALL_QLOAD_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
79
tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
79
-#define ALL_QSTORE_REGS ALL_QLOAD_REGS
80
arg_info(op->args[2])->val);
80
-#endif
81
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
81
-
82
- break;
82
TCGPowerISA have_isa;
83
+ continue;
83
static bool have_isel;
84
}
84
bool have_altivec;
85
- goto do_default;
85
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
86
+ break;
86
87
87
case INDEX_op_qemu_ld_i32:
88
CASE_OP_32_64(clz):
88
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
89
CASE_OP_32_64(ctz):
89
- ? C_O1_I1(r, L)
90
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
90
- : C_O1_I2(r, L, L));
91
} else {
91
+ ? C_O1_I1(r, r)
92
tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
92
+ : C_O1_I2(r, r, r));
93
}
93
94
- break;
94
case INDEX_op_qemu_st_i32:
95
+ continue;
95
return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
96
}
96
- ? C_O0_I2(S, S)
97
- goto do_default;
97
- : C_O0_I3(S, S, S));
98
+ break;
98
+ ? C_O0_I2(r, r)
99
99
+ : C_O0_I3(r, r, r));
100
CASE_OP_32_64(deposit):
100
101
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
101
case INDEX_op_qemu_ld_i64:
102
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
102
- return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
103
op->args[3], op->args[4],
103
- : TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
104
arg_info(op->args[2])->val);
104
- : C_O2_I2(L, L, L, L));
105
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
105
+ return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
106
- break;
106
+ : TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
107
+ continue;
107
+ : C_O2_I2(r, r, r, r));
108
}
108
109
- goto do_default;
109
case INDEX_op_qemu_st_i64:
110
+ break;
110
- return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
111
111
- : TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
112
CASE_OP_32_64(extract):
112
- : C_O0_I4(S, S, S, S));
113
if (arg_is_const(op->args[1])) {
113
+ return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
114
tmp = extract64(arg_info(op->args[1])->val,
114
+ : TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
115
op->args[2], op->args[3]);
115
+ : C_O0_I4(r, r, r, r));
116
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
116
117
- break;
117
case INDEX_op_add_vec:
118
+ continue;
118
case INDEX_op_sub_vec:
119
}
120
- goto do_default;
121
+ break;
122
123
CASE_OP_32_64(sextract):
124
if (arg_is_const(op->args[1])) {
125
tmp = sextract64(arg_info(op->args[1])->val,
126
op->args[2], op->args[3]);
127
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
128
- break;
129
+ continue;
130
}
131
- goto do_default;
132
+ break;
133
134
CASE_OP_32_64(extract2):
135
if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
136
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
137
((uint32_t)v2 << (32 - shr)));
138
}
139
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
140
- break;
141
+ continue;
142
}
143
- goto do_default;
144
+ break;
145
146
CASE_OP_32_64(setcond):
147
tmp = do_constant_folding_cond(opc, op->args[1],
148
op->args[2], op->args[3]);
149
if (tmp != 2) {
150
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
151
- break;
152
+ continue;
153
}
154
- goto do_default;
155
+ break;
156
157
CASE_OP_32_64(brcond):
158
tmp = do_constant_folding_cond(opc, op->args[0],
159
op->args[1], op->args[2]);
160
- if (tmp != 2) {
161
- if (tmp) {
162
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
163
- op->opc = INDEX_op_br;
164
- op->args[0] = op->args[3];
165
- } else {
166
- tcg_op_remove(s, op);
167
- }
168
+ switch (tmp) {
169
+ case 0:
170
+ tcg_op_remove(s, op);
171
+ continue;
172
+ case 1:
173
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
174
+ op->opc = opc = INDEX_op_br;
175
+ op->args[0] = op->args[3];
176
break;
177
}
178
- goto do_default;
179
+ break;
180
181
CASE_OP_32_64(movcond):
182
tmp = do_constant_folding_cond(opc, op->args[1],
183
op->args[2], op->args[5]);
184
if (tmp != 2) {
185
tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
186
- break;
187
+ continue;
188
}
189
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
190
uint64_t tv = arg_info(op->args[3])->val;
191
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
192
if (fv == 1 && tv == 0) {
193
cond = tcg_invert_cond(cond);
194
} else if (!(tv == 1 && fv == 0)) {
195
- goto do_default;
196
+ break;
197
}
198
op->args[3] = cond;
199
op->opc = opc = (opc == INDEX_op_movcond_i32
200
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
201
: INDEX_op_setcond_i64);
202
nb_iargs = 2;
203
}
204
- goto do_default;
205
+ break;
206
207
case INDEX_op_add2_i32:
208
case INDEX_op_sub2_i32:
209
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
210
rh = op->args[1];
211
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)a);
212
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(a >> 32));
213
- break;
214
+ continue;
215
}
216
- goto do_default;
217
+ break;
218
219
case INDEX_op_mulu2_i32:
220
if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
221
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
222
rh = op->args[1];
223
tcg_opt_gen_movi(s, &ctx, op, rl, (int32_t)r);
224
tcg_opt_gen_movi(s, &ctx, op2, rh, (int32_t)(r >> 32));
225
- break;
226
+ continue;
227
}
228
- goto do_default;
229
+ break;
230
231
case INDEX_op_brcond2_i32:
232
tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
233
op->args[4]);
234
- if (tmp != 2) {
235
- if (tmp) {
236
- do_brcond_true:
237
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
238
- op->opc = INDEX_op_br;
239
- op->args[0] = op->args[5];
240
- } else {
241
+ if (tmp == 0) {
242
do_brcond_false:
243
- tcg_op_remove(s, op);
244
- }
245
- } else if ((op->args[4] == TCG_COND_LT
246
- || op->args[4] == TCG_COND_GE)
247
- && arg_is_const(op->args[2])
248
- && arg_info(op->args[2])->val == 0
249
- && arg_is_const(op->args[3])
250
- && arg_info(op->args[3])->val == 0) {
251
+ tcg_op_remove(s, op);
252
+ continue;
253
+ }
254
+ if (tmp == 1) {
255
+ do_brcond_true:
256
+ op->opc = opc = INDEX_op_br;
257
+ op->args[0] = op->args[5];
258
+ break;
259
+ }
260
+ if ((op->args[4] == TCG_COND_LT || op->args[4] == TCG_COND_GE)
261
+ && arg_is_const(op->args[2])
262
+ && arg_info(op->args[2])->val == 0
263
+ && arg_is_const(op->args[3])
264
+ && arg_info(op->args[3])->val == 0) {
265
/* Simplify LT/GE comparisons vs zero to a single compare
266
vs the high word of the input. */
267
do_brcond_high:
268
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
269
- op->opc = INDEX_op_brcond_i32;
270
+ op->opc = opc = INDEX_op_brcond_i32;
271
op->args[0] = op->args[1];
272
op->args[1] = op->args[3];
273
op->args[2] = op->args[4];
274
op->args[3] = op->args[5];
275
- } else if (op->args[4] == TCG_COND_EQ) {
276
+ break;
277
+ }
278
+ if (op->args[4] == TCG_COND_EQ) {
279
/* Simplify EQ comparisons where one of the pairs
280
can be simplified. */
281
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
282
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
283
if (tmp == 0) {
284
goto do_brcond_false;
285
} else if (tmp != 1) {
286
- goto do_default;
287
+ break;
288
}
289
do_brcond_low:
290
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
291
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
292
op->args[1] = op->args[2];
293
op->args[2] = op->args[4];
294
op->args[3] = op->args[5];
295
- } else if (op->args[4] == TCG_COND_NE) {
296
+ break;
297
+ }
298
+ if (op->args[4] == TCG_COND_NE) {
299
/* Simplify NE comparisons where one of the pairs
300
can be simplified. */
301
tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
302
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
303
} else if (tmp == 1) {
304
goto do_brcond_true;
305
}
306
- goto do_default;
307
- } else {
308
- goto do_default;
309
}
310
break;
311
312
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
313
if (tmp != 2) {
314
do_setcond_const:
315
tcg_opt_gen_movi(s, &ctx, op, op->args[0], tmp);
316
- } else if ((op->args[5] == TCG_COND_LT
317
- || op->args[5] == TCG_COND_GE)
318
- && arg_is_const(op->args[3])
319
- && arg_info(op->args[3])->val == 0
320
- && arg_is_const(op->args[4])
321
- && arg_info(op->args[4])->val == 0) {
322
+ continue;
323
+ }
324
+ if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
325
+ && arg_is_const(op->args[3])
326
+ && arg_info(op->args[3])->val == 0
327
+ && arg_is_const(op->args[4])
328
+ && arg_info(op->args[4])->val == 0) {
329
/* Simplify LT/GE comparisons vs zero to a single compare
330
vs the high word of the input. */
331
do_setcond_high:
332
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
333
op->args[1] = op->args[2];
334
op->args[2] = op->args[4];
335
op->args[3] = op->args[5];
336
- } else if (op->args[5] == TCG_COND_EQ) {
337
+ break;
338
+ }
339
+ if (op->args[5] == TCG_COND_EQ) {
340
/* Simplify EQ comparisons where one of the pairs
341
can be simplified. */
342
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
343
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
344
if (tmp == 0) {
345
goto do_setcond_high;
346
} else if (tmp != 1) {
347
- goto do_default;
348
+ break;
349
}
350
do_setcond_low:
351
reset_temp(op->args[0]);
352
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
353
op->opc = INDEX_op_setcond_i32;
354
op->args[2] = op->args[3];
355
op->args[3] = op->args[5];
356
- } else if (op->args[5] == TCG_COND_NE) {
357
+ break;
358
+ }
359
+ if (op->args[5] == TCG_COND_NE) {
360
/* Simplify NE comparisons where one of the pairs
361
can be simplified. */
362
tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
363
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
364
} else if (tmp == 1) {
365
goto do_setcond_const;
366
}
367
- goto do_default;
368
- } else {
369
- goto do_default;
370
}
371
break;
372
373
- case INDEX_op_call:
374
- if (!(tcg_call_flags(op)
375
+ default:
376
+ break;
377
+ }
378
+
379
+ /* Some of the folding above can change opc. */
380
+ opc = op->opc;
381
+ def = &tcg_op_defs[opc];
382
+ if (def->flags & TCG_OPF_BB_END) {
383
+ memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
384
+ } else {
385
+ if (opc == INDEX_op_call &&
386
+ !(tcg_call_flags(op)
387
& (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
388
for (i = 0; i < nb_globals; i++) {
389
if (test_bit(i, ctx.temps_used.l)) {
390
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
391
}
392
}
393
}
394
- goto do_reset_output;
395
396
- default:
397
- do_default:
398
- /* Default case: we know nothing about operation (or were unable
399
- to compute the operation result) so no propagation is done.
400
- We trash everything if the operation is the end of a basic
401
- block, otherwise we only trash the output args. "z_mask" is
402
- the non-zero bits mask for the first output arg. */
403
- if (def->flags & TCG_OPF_BB_END) {
404
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
405
- } else {
406
- do_reset_output:
407
- for (i = 0; i < nb_oargs; i++) {
408
- reset_temp(op->args[i]);
409
- /* Save the corresponding known-zero bits mask for the
410
- first output argument (only one supported so far). */
411
- if (i == 0) {
412
- arg_info(op->args[i])->z_mask = z_mask;
413
- }
414
+ for (i = 0; i < nb_oargs; i++) {
415
+ reset_temp(op->args[i]);
416
+ /* Save the corresponding known-zero bits mask for the
417
+ first output argument (only one supported so far). */
418
+ if (i == 0) {
419
+ arg_info(op->args[i])->z_mask = z_mask;
420
}
421
}
422
- break;
423
}
424
425
/* Eliminate duplicate and redundant fence instructions. */
426
--
119
--
427
2.25.1
120
2.34.1
428
121
429
122
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
These constraints have not been used for quite some time.
2
2
3
These will be used to implement new decimal floating point
3
Fixes: 77b73de67632 ("Use rem/div[u]_i32 drop div[u]2_i32")
4
instructions from Power ISA 3.1.
4
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
5
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
The remainder is now returned directly by divu128/divs128,
6
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
7
freeing up phigh to receive the high 64 bits of the quotient.
8
9
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
10
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
11
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
12
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
13
---
8
---
14
include/hw/clock.h | 6 +-
9
tcg/ppc/tcg-target-con-str.h | 4 ----
15
include/qemu/host-utils.h | 20 ++++--
10
1 file changed, 4 deletions(-)
16
target/ppc/int_helper.c | 9 +--
17
util/host-utils.c | 133 +++++++++++++++++++++++++-------------
18
4 files changed, 108 insertions(+), 60 deletions(-)
19
11
20
diff --git a/include/hw/clock.h b/include/hw/clock.h
12
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
21
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
22
--- a/include/hw/clock.h
14
--- a/tcg/ppc/tcg-target-con-str.h
23
+++ b/include/hw/clock.h
15
+++ b/tcg/ppc/tcg-target-con-str.h
24
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
16
@@ -XXX,XX +XXX,XX @@
25
if (clk->period == 0) {
17
*/
26
return 0;
18
REGS('r', ALL_GENERAL_REGS)
27
}
19
REGS('v', ALL_VECTOR_REGS)
28
- /*
20
-REGS('A', 1u << TCG_REG_R3)
29
- * BUG: when CONFIG_INT128 is not defined, the current implementation of
21
-REGS('B', 1u << TCG_REG_R4)
30
- * divu128 does not return a valid truncated quotient, so the result will
22
-REGS('C', 1u << TCG_REG_R5)
31
- * be wrong.
23
-REGS('D', 1u << TCG_REG_R6)
32
- */
33
+
34
divu128(&lo, &hi, clk->period);
35
return lo;
36
}
37
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
38
index XXXXXXX..XXXXXXX 100644
39
--- a/include/qemu/host-utils.h
40
+++ b/include/qemu/host-utils.h
41
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
42
return (__int128_t)a * b / c;
43
}
44
45
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
46
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
47
+ uint64_t divisor)
48
{
49
__uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
50
__uint128_t result = dividend / divisor;
51
+
52
*plow = result;
53
- *phigh = dividend % divisor;
54
+ *phigh = result >> 64;
55
+ return dividend % divisor;
56
}
57
58
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
59
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
60
+ int64_t divisor)
61
{
62
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
63
+ __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
64
__int128_t result = dividend / divisor;
65
+
66
*plow = result;
67
- *phigh = dividend % divisor;
68
+ *phigh = result >> 64;
69
+ return dividend % divisor;
70
}
71
#else
72
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
73
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
74
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
75
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
76
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
77
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
78
79
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
80
{
81
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
82
index XXXXXXX..XXXXXXX 100644
83
--- a/target/ppc/int_helper.c
84
+++ b/target/ppc/int_helper.c
85
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
86
87
uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
88
{
89
- int64_t rt = 0;
90
+ uint64_t rt = 0;
91
int64_t ra = (int64_t)rau;
92
int64_t rb = (int64_t)rbu;
93
int overflow = 0;
94
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
95
int cr;
96
uint64_t lo_value;
97
uint64_t hi_value;
98
+ uint64_t rem;
99
ppc_avr_t ret = { .u64 = { 0, 0 } };
100
101
if (b->VsrSD(0) < 0) {
102
@@ -XXX,XX +XXX,XX @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
103
* In that case, we leave r unchanged.
104
*/
105
} else {
106
- divu128(&lo_value, &hi_value, 1000000000000000ULL);
107
+ rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
108
109
- for (i = 1; i < 16; hi_value /= 10, i++) {
110
- bcd_put_digit(&ret, hi_value % 10, i);
111
+ for (i = 1; i < 16; rem /= 10, i++) {
112
+ bcd_put_digit(&ret, rem % 10, i);
113
}
114
115
for (; i < 32; lo_value /= 10, i++) {
116
diff --git a/util/host-utils.c b/util/host-utils.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/util/host-utils.c
119
+++ b/util/host-utils.c
120
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
121
}
122
24
123
/*
25
/*
124
- * Unsigned 128-by-64 division. Returns quotient via plow and
26
* Define constraint letters for constants:
125
- * remainder via phigh.
126
- * The result must fit in 64 bits (plow) - otherwise, the result
127
- * is undefined.
128
- * This function will cause a division by zero if passed a zero divisor.
129
+ * Unsigned 128-by-64 division.
130
+ * Returns the remainder.
131
+ * Returns quotient via plow and phigh.
132
+ * Also returns the remainder via the function return value.
133
*/
134
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
135
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
136
{
137
uint64_t dhi = *phigh;
138
uint64_t dlo = *plow;
139
- unsigned i;
140
- uint64_t carry = 0;
141
+ uint64_t rem, dhighest;
142
+ int sh;
143
144
if (divisor == 0 || dhi == 0) {
145
*plow = dlo / divisor;
146
- *phigh = dlo % divisor;
147
+ *phigh = 0;
148
+ return dlo % divisor;
149
} else {
150
+ sh = clz64(divisor);
151
152
- for (i = 0; i < 64; i++) {
153
- carry = dhi >> 63;
154
- dhi = (dhi << 1) | (dlo >> 63);
155
- if (carry || (dhi >= divisor)) {
156
- dhi -= divisor;
157
- carry = 1;
158
- } else {
159
- carry = 0;
160
+ if (dhi < divisor) {
161
+ if (sh != 0) {
162
+ /* normalize the divisor, shifting the dividend accordingly */
163
+ divisor <<= sh;
164
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
165
+ dlo <<= sh;
166
}
167
- dlo = (dlo << 1) | carry;
168
+
169
+ *phigh = 0;
170
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
171
+ } else {
172
+ if (sh != 0) {
173
+ /* normalize the divisor, shifting the dividend accordingly */
174
+ divisor <<= sh;
175
+ dhighest = dhi >> (64 - sh);
176
+ dhi = (dhi << sh) | (dlo >> (64 - sh));
177
+ dlo <<= sh;
178
+
179
+ *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
180
+ } else {
181
+ /**
182
+ * dhi >= divisor
183
+ * Since the MSB of divisor is set (sh == 0),
184
+ * (dhi - divisor) < divisor
185
+ *
186
+ * Thus, the high part of the quotient is 1, and we can
187
+ * calculate the low part with a single call to udiv_qrnnd
188
+ * after subtracting divisor from dhi
189
+ */
190
+ dhi -= divisor;
191
+ *phigh = 1;
192
+ }
193
+
194
+ *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
195
}
196
197
- *plow = dlo;
198
- *phigh = dhi;
199
+ /*
200
+ * since the dividend/divisor might have been normalized,
201
+ * the remainder might also have to be shifted back
202
+ */
203
+ return rem >> sh;
204
}
205
}
206
207
/*
208
- * Signed 128-by-64 division. Returns quotient via plow and
209
- * remainder via phigh.
210
- * The result must fit in 64 bits (plow) - otherwise, the result
211
- * is undefined.
212
- * This function will cause a division by zero if passed a zero divisor.
213
+ * Signed 128-by-64 division.
214
+ * Returns quotient via plow and phigh.
215
+ * Also returns the remainder via the function return value.
216
*/
217
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
218
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
219
{
220
- int sgn_dvdnd = *phigh < 0;
221
- int sgn_divsr = divisor < 0;
222
+ bool neg_quotient = false, neg_remainder = false;
223
+ uint64_t unsig_hi = *phigh, unsig_lo = *plow;
224
+ uint64_t rem;
225
226
- if (sgn_dvdnd) {
227
- *plow = ~(*plow);
228
- *phigh = ~(*phigh);
229
- if (*plow == (int64_t)-1) {
230
+ if (*phigh < 0) {
231
+ neg_quotient = !neg_quotient;
232
+ neg_remainder = !neg_remainder;
233
+
234
+ if (unsig_lo == 0) {
235
+ unsig_hi = -unsig_hi;
236
+ } else {
237
+ unsig_hi = ~unsig_hi;
238
+ unsig_lo = -unsig_lo;
239
+ }
240
+ }
241
+
242
+ if (divisor < 0) {
243
+ neg_quotient = !neg_quotient;
244
+
245
+ divisor = -divisor;
246
+ }
247
+
248
+ rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
249
+
250
+ if (neg_quotient) {
251
+ if (unsig_lo == 0) {
252
+ *phigh = -unsig_hi;
253
*plow = 0;
254
- (*phigh)++;
255
- } else {
256
- (*plow)++;
257
- }
258
+ } else {
259
+ *phigh = ~unsig_hi;
260
+ *plow = -unsig_lo;
261
+ }
262
+ } else {
263
+ *phigh = unsig_hi;
264
+ *plow = unsig_lo;
265
}
266
267
- if (sgn_divsr) {
268
- divisor = 0 - divisor;
269
- }
270
-
271
- divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
272
-
273
- if (sgn_dvdnd ^ sgn_divsr) {
274
- *plow = 0 - *plow;
275
+ if (neg_remainder) {
276
+ return -rem;
277
+ } else {
278
+ return rem;
279
}
280
}
281
#endif
282
--
27
--
283
2.25.1
28
2.34.1
284
29
285
30
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Never used since its introduction.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Fixes: 3d582c6179c ("tcg-ppc64: Rearrange integer constant constraints")
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
6
---
5
tcg/optimize.c | 53 +++++++++++++++++++++++++++++---------------------
7
tcg/ppc/tcg-target-con-str.h | 1 -
6
1 file changed, 31 insertions(+), 22 deletions(-)
8
tcg/ppc/tcg-target.c.inc | 3 ---
9
2 files changed, 4 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/tcg/ppc/tcg-target-con-str.h
11
+++ b/tcg/optimize.c
14
+++ b/tcg/ppc/tcg-target-con-str.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_divide(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ REGS('v', ALL_VECTOR_REGS)
13
return fold_const2(ctx, op);
16
* CONST(letter, TCG_CT_CONST_* bit set)
14
}
17
*/
15
18
CONST('I', TCG_CT_CONST_S16)
16
+static bool fold_dup(OptContext *ctx, TCGOp *op)
19
-CONST('J', TCG_CT_CONST_U16)
17
+{
20
CONST('M', TCG_CT_CONST_MONE)
18
+ if (arg_is_const(op->args[1])) {
21
CONST('T', TCG_CT_CONST_S32)
19
+ uint64_t t = arg_info(op->args[1])->val;
22
CONST('U', TCG_CT_CONST_U32)
20
+ t = dup_const(TCGOP_VECE(op), t);
23
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
21
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
24
index XXXXXXX..XXXXXXX 100644
22
+ }
25
--- a/tcg/ppc/tcg-target.c.inc
23
+ return false;
26
+++ b/tcg/ppc/tcg-target.c.inc
24
+}
27
@@ -XXX,XX +XXX,XX @@
25
+
28
#define SZR (TCG_TARGET_REG_BITS / 8)
26
+static bool fold_dup2(OptContext *ctx, TCGOp *op)
29
27
+{
30
#define TCG_CT_CONST_S16 0x100
28
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
31
-#define TCG_CT_CONST_U16 0x200
29
+ uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
32
#define TCG_CT_CONST_S32 0x400
30
+ arg_info(op->args[2])->val);
33
#define TCG_CT_CONST_U32 0x800
31
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
34
#define TCG_CT_CONST_ZERO 0x1000
32
+ }
35
@@ -XXX,XX +XXX,XX @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
33
+
36
34
+ if (args_are_copies(op->args[1], op->args[2])) {
37
if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
35
+ op->opc = INDEX_op_dup_vec;
38
return 1;
36
+ TCGOP_VECE(op) = MO_32;
39
- } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
37
+ }
40
- return 1;
38
+ return false;
41
} else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
39
+}
42
return 1;
40
+
43
} else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
41
static bool fold_eqv(OptContext *ctx, TCGOp *op)
42
{
43
return fold_const2(ctx, op);
44
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
45
done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
46
break;
47
48
- case INDEX_op_dup_vec:
49
- if (arg_is_const(op->args[1])) {
50
- tmp = arg_info(op->args[1])->val;
51
- tmp = dup_const(TCGOP_VECE(op), tmp);
52
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
53
- continue;
54
- }
55
- break;
56
-
57
- case INDEX_op_dup2_vec:
58
- assert(TCG_TARGET_REG_BITS == 32);
59
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
60
- tcg_opt_gen_movi(&ctx, op, op->args[0],
61
- deposit64(arg_info(op->args[1])->val, 32, 32,
62
- arg_info(op->args[2])->val));
63
- continue;
64
- } else if (args_are_copies(op->args[1], op->args[2])) {
65
- op->opc = INDEX_op_dup_vec;
66
- TCGOP_VECE(op) = MO_32;
67
- }
68
- break;
69
-
70
default:
71
break;
72
73
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
74
CASE_OP_32_64(divu):
75
done = fold_divide(&ctx, op);
76
break;
77
+ case INDEX_op_dup_vec:
78
+ done = fold_dup(&ctx, op);
79
+ break;
80
+ case INDEX_op_dup2_vec:
81
+ done = fold_dup2(&ctx, op);
82
+ break;
83
CASE_OP_32_64(eqv):
84
done = fold_eqv(&ctx, op);
85
break;
86
--
44
--
87
2.25.1
45
2.34.1
88
46
89
47
diff view generated by jsdifflib
1
The results are generally 6 bit unsigned values, though
1
The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available
2
the count leading and trailing bits may produce any value
2
registers. Now that we handle overlap betwen inputs and helper arguments,
3
for a zero input.
3
we can allow any allocatable reg.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
8
---
9
tcg/optimize.c | 3 ++-
9
tcg/riscv/tcg-target-con-set.h | 2 --
10
1 file changed, 2 insertions(+), 1 deletion(-)
10
tcg/riscv/tcg-target-con-str.h | 1 -
11
tcg/riscv/tcg-target.c.inc | 16 +++-------------
12
3 files changed, 3 insertions(+), 16 deletions(-)
11
13
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
14
diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
16
--- a/tcg/riscv/tcg-target-con-set.h
15
+++ b/tcg/optimize.c
17
+++ b/tcg/riscv/tcg-target-con-set.h
16
@@ -XXX,XX +XXX,XX @@ static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
18
@@ -XXX,XX +XXX,XX @@
17
g_assert_not_reached();
19
* tcg-target-con-str.h; the constraint combination is inclusive or.
18
}
20
*/
19
ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
21
C_O0_I1(r)
20
-
22
-C_O0_I2(LZ, L)
21
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
23
C_O0_I2(rZ, r)
22
return false;
24
C_O0_I2(rZ, rZ)
23
}
25
-C_O1_I1(r, L)
24
26
C_O1_I1(r, r)
25
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
27
C_O1_I2(r, r, ri)
28
C_O1_I2(r, r, rI)
29
diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/tcg/riscv/tcg-target-con-str.h
32
+++ b/tcg/riscv/tcg-target-con-str.h
33
@@ -XXX,XX +XXX,XX @@
34
* REGS(letter, register_mask)
35
*/
36
REGS('r', ALL_GENERAL_REGS)
37
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
38
39
/*
40
* Define constraint letters for constants:
41
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
42
index XXXXXXX..XXXXXXX 100644
43
--- a/tcg/riscv/tcg-target.c.inc
44
+++ b/tcg/riscv/tcg-target.c.inc
45
@@ -XXX,XX +XXX,XX @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
46
#define TCG_CT_CONST_N12 0x400
47
#define TCG_CT_CONST_M12 0x800
48
49
-#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
50
-/*
51
- * For softmmu, we need to avoid conflicts with the first 5
52
- * argument registers to call the helper. Some of these are
53
- * also used for the tlb lookup.
54
- */
55
-#ifdef CONFIG_SOFTMMU
56
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_A0, 5)
57
-#else
58
-#define SOFTMMU_RESERVE_REGS 0
59
-#endif
60
+#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32)
61
62
#define sextreg sextract64
63
64
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
65
66
case INDEX_op_qemu_ld_i32:
67
case INDEX_op_qemu_ld_i64:
68
- return C_O1_I1(r, L);
69
+ return C_O1_I1(r, r);
70
case INDEX_op_qemu_st_i32:
71
case INDEX_op_qemu_st_i64:
72
- return C_O0_I2(LZ, L);
73
+ return C_O0_I2(rZ, r);
74
26
default:
75
default:
27
g_assert_not_reached();
76
g_assert_not_reached();
28
}
29
+ ctx->s_mask = smask_from_zmask(ctx->z_mask);
30
return false;
31
}
32
33
--
77
--
34
2.25.1
78
2.34.1
35
79
36
80
diff view generated by jsdifflib
1
Rather than try to keep these up-to-date across folding,
1
Rather than zero-extend the guest address into a register,
2
re-read nb_oargs at the end, after re-reading the opcode.
2
use an add instruction which zero-extends the second input.
3
4
A couple of asserts need dropping, but that will take care
5
of itself as we split the function further.
6
3
7
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
10
---
6
---
11
tcg/optimize.c | 14 ++++----------
7
tcg/s390x/tcg-target.c.inc | 8 +++++---
12
1 file changed, 4 insertions(+), 10 deletions(-)
8
1 file changed, 5 insertions(+), 3 deletions(-)
13
9
14
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
15
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
16
--- a/tcg/optimize.c
12
--- a/tcg/s390x/tcg-target.c.inc
17
+++ b/tcg/optimize.c
13
+++ b/tcg/s390x/tcg-target.c.inc
18
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
14
@@ -XXX,XX +XXX,XX @@ typedef enum S390Opcode {
19
15
RRE_ALGR = 0xb90a,
20
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
16
RRE_ALCR = 0xb998,
21
uint64_t z_mask, partmask, affected, tmp;
17
RRE_ALCGR = 0xb988,
22
- int nb_oargs, nb_iargs;
18
+ RRE_ALGFR = 0xb91a,
23
TCGOpcode opc = op->opc;
19
RRE_CGR = 0xb920,
24
const TCGOpDef *def;
20
RRE_CLGR = 0xb921,
25
21
RRE_DLGR = 0xb987,
26
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
22
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
27
}
23
tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
28
24
offsetof(CPUTLBEntry, addend));
29
def = &tcg_op_defs[opc];
25
30
- nb_oargs = def->nb_oargs;
26
- h->base = addr_reg;
31
- nb_iargs = def->nb_iargs;
27
if (TARGET_LONG_BITS == 32) {
32
- init_arguments(&ctx, op, nb_oargs + nb_iargs);
28
- tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
33
- copy_propagate(&ctx, op, nb_oargs, nb_iargs);
29
- h->base = TCG_REG_R3;
34
+ init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
30
+ tcg_out_insn(s, RRE, ALGFR, h->index, addr_reg);
35
+ copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
31
+ h->base = TCG_REG_NONE;
36
32
+ } else {
37
/* For commutative operations make constant second argument */
33
+ h->base = addr_reg;
38
switch (opc) {
34
}
39
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
35
h->disp = 0;
40
36
#else
41
CASE_OP_32_64(qemu_ld):
42
{
43
- MemOpIdx oi = op->args[nb_oargs + nb_iargs];
44
+ MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
45
MemOp mop = get_memop(oi);
46
if (!(mop & MO_SIGN)) {
47
z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
}
50
51
if (partmask == 0) {
52
- tcg_debug_assert(nb_oargs == 1);
53
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
54
continue;
55
}
56
if (affected == 0) {
57
- tcg_debug_assert(nb_oargs == 1);
58
tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
59
continue;
60
}
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
} else if (args_are_copies(op->args[1], op->args[2])) {
63
op->opc = INDEX_op_dup_vec;
64
TCGOP_VECE(op) = MO_32;
65
- nb_iargs = 1;
66
}
67
break;
68
69
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
70
op->opc = opc = (opc == INDEX_op_movcond_i32
71
? INDEX_op_setcond_i32
72
: INDEX_op_setcond_i64);
73
- nb_iargs = 2;
74
}
75
break;
76
77
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
78
if (def->flags & TCG_OPF_BB_END) {
79
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
80
} else {
81
+ int nb_oargs = def->nb_oargs;
82
for (i = 0; i < nb_oargs; i++) {
83
reset_temp(op->args[i]);
84
/* Save the corresponding known-zero bits mask for the
85
--
37
--
86
2.25.1
38
2.34.1
87
39
88
40
diff view generated by jsdifflib
1
Prepare for tracking different masks by renaming this one.
1
Adjust the softmmu tlb to use R0+R1, not any of the normally available
2
registers. Since we handle overlap betwen inputs and helper arguments,
3
we can allow any allocatable reg.
2
4
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 142 +++++++++++++++++++++++++------------------------
8
tcg/s390x/tcg-target-con-set.h | 2 --
9
1 file changed, 72 insertions(+), 70 deletions(-)
9
tcg/s390x/tcg-target-con-str.h | 1 -
10
tcg/s390x/tcg-target.c.inc | 36 ++++++++++++----------------------
11
3 files changed, 12 insertions(+), 27 deletions(-)
10
12
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
15
--- a/tcg/s390x/tcg-target-con-set.h
14
+++ b/tcg/optimize.c
16
+++ b/tcg/s390x/tcg-target-con-set.h
15
@@ -XXX,XX +XXX,XX @@ typedef struct TempOptInfo {
17
@@ -XXX,XX +XXX,XX @@
16
TCGTemp *prev_copy;
18
* tcg-target-con-str.h; the constraint combination is inclusive or.
17
TCGTemp *next_copy;
19
*/
18
uint64_t val;
20
C_O0_I1(r)
19
- uint64_t mask;
21
-C_O0_I2(L, L)
20
+ uint64_t z_mask; /* mask bit is 0 if and only if value bit is 0 */
22
C_O0_I2(r, r)
21
} TempOptInfo;
23
C_O0_I2(r, ri)
22
24
C_O0_I2(r, rA)
23
static inline TempOptInfo *ts_info(TCGTemp *ts)
25
C_O0_I2(v, r)
24
@@ -XXX,XX +XXX,XX @@ static void reset_ts(TCGTemp *ts)
26
-C_O1_I1(r, L)
25
ti->next_copy = ts;
27
C_O1_I1(r, r)
26
ti->prev_copy = ts;
28
C_O1_I1(v, r)
27
ti->is_const = false;
29
C_O1_I1(v, v)
28
- ti->mask = -1;
30
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
29
+ ti->z_mask = -1;
31
index XXXXXXX..XXXXXXX 100644
30
}
32
--- a/tcg/s390x/tcg-target-con-str.h
31
33
+++ b/tcg/s390x/tcg-target-con-str.h
32
static void reset_temp(TCGArg arg)
34
@@ -XXX,XX +XXX,XX @@
33
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(TCGTempSet *temps_used, TCGTemp *ts)
35
* REGS(letter, register_mask)
34
if (ts->kind == TEMP_CONST) {
36
*/
35
ti->is_const = true;
37
REGS('r', ALL_GENERAL_REGS)
36
ti->val = ts->val;
38
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
37
- ti->mask = ts->val;
39
REGS('v', ALL_VECTOR_REGS)
38
+ ti->z_mask = ts->val;
40
REGS('o', 0xaaaa) /* odd numbered general regs */
39
if (TCG_TARGET_REG_BITS > 32 && ts->type == TCG_TYPE_I32) {
41
40
/* High bits of a 32-bit quantity are garbage. */
42
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
41
- ti->mask |= ~0xffffffffull;
43
index XXXXXXX..XXXXXXX 100644
42
+ ti->z_mask |= ~0xffffffffull;
44
--- a/tcg/s390x/tcg-target.c.inc
43
}
45
+++ b/tcg/s390x/tcg-target.c.inc
46
@@ -XXX,XX +XXX,XX @@
47
#define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
48
#define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
49
50
-/*
51
- * For softmmu, we need to avoid conflicts with the first 3
52
- * argument registers to perform the tlb lookup, and to call
53
- * the helper function.
54
- */
55
-#ifdef CONFIG_SOFTMMU
56
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
57
-#else
58
-#define SOFTMMU_RESERVE_REGS 0
59
-#endif
60
-
61
-
62
/* Several places within the instruction set 0 means "no register"
63
rather than TCG_REG_R0. */
64
#define TCG_REG_NONE 0
65
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
66
ldst->oi = oi;
67
ldst->addrlo_reg = addr_reg;
68
69
- tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
70
+ tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
71
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
72
73
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
74
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
75
- tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
76
- tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
77
+ tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
78
+ tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
79
80
/*
81
* For aligned accesses, we check the first byte and include the alignment
82
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
83
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
84
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
85
if (a_off == 0) {
86
- tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
87
+ tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
44
} else {
88
} else {
45
ti->is_const = false;
89
- tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
46
- ti->mask = -1;
90
- tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
47
+ ti->z_mask = -1;
91
+ tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
92
+ tgen_andi(s, TCG_TYPE_TL, TCG_REG_R0, tlb_mask);
48
}
93
}
49
}
94
50
95
if (is_ld) {
51
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
96
@@ -XXX,XX +XXX,XX @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
52
const TCGOpDef *def;
97
ofs = offsetof(CPUTLBEntry, addr_write);
53
TempOptInfo *di;
54
TempOptInfo *si;
55
- uint64_t mask;
56
+ uint64_t z_mask;
57
TCGOpcode new_op;
58
59
if (ts_are_copies(dst_ts, src_ts)) {
60
@@ -XXX,XX +XXX,XX @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
61
op->args[0] = dst;
62
op->args[1] = src;
63
64
- mask = si->mask;
65
+ z_mask = si->z_mask;
66
if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
67
/* High bits of the destination are now garbage. */
68
- mask |= ~0xffffffffull;
69
+ z_mask |= ~0xffffffffull;
70
}
98
}
71
- di->mask = mask;
99
if (TARGET_LONG_BITS == 32) {
72
+ di->z_mask = z_mask;
100
- tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
73
101
+ tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
74
if (src_ts->type == dst_ts->type) {
102
} else {
75
TempOptInfo *ni = ts_info(si->next_copy);
103
- tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
76
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
104
+ tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
77
}
105
}
78
106
79
QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
107
tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
80
- uint64_t mask, partmask, affected, tmp;
108
ldst->label_ptr[0] = s->code_ptr++;
81
+ uint64_t z_mask, partmask, affected, tmp;
109
82
int nb_oargs, nb_iargs;
110
- h->index = TCG_REG_R2;
83
TCGOpcode opc = op->opc;
111
- tcg_out_insn(s, RXY, LG, h->index, TCG_REG_R2, TCG_REG_NONE,
84
const TCGOpDef *def = &tcg_op_defs[opc];
112
+ h->index = TCG_TMP0;
85
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
113
+ tcg_out_insn(s, RXY, LG, h->index, TCG_TMP0, TCG_REG_NONE,
86
114
offsetof(CPUTLBEntry, addend));
87
/* Simplify using known-zero bits. Currently only ops with a single
115
88
output argument is supported. */
116
if (TARGET_LONG_BITS == 32) {
89
- mask = -1;
117
@@ -XXX,XX +XXX,XX @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
90
+ z_mask = -1;
118
91
affected = -1;
119
case INDEX_op_qemu_ld_i32:
92
switch (opc) {
120
case INDEX_op_qemu_ld_i64:
93
CASE_OP_32_64(ext8s):
121
- return C_O1_I1(r, L);
94
- if ((arg_info(op->args[1])->mask & 0x80) != 0) {
122
+ return C_O1_I1(r, r);
95
+ if ((arg_info(op->args[1])->z_mask & 0x80) != 0) {
123
case INDEX_op_qemu_st_i64:
96
break;
124
case INDEX_op_qemu_st_i32:
97
}
125
- return C_O0_I2(L, L);
98
QEMU_FALLTHROUGH;
126
+ return C_O0_I2(r, r);
99
CASE_OP_32_64(ext8u):
127
100
- mask = 0xff;
128
case INDEX_op_deposit_i32:
101
+ z_mask = 0xff;
129
case INDEX_op_deposit_i64:
102
goto and_const;
103
CASE_OP_32_64(ext16s):
104
- if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
105
+ if ((arg_info(op->args[1])->z_mask & 0x8000) != 0) {
106
break;
107
}
108
QEMU_FALLTHROUGH;
109
CASE_OP_32_64(ext16u):
110
- mask = 0xffff;
111
+ z_mask = 0xffff;
112
goto and_const;
113
case INDEX_op_ext32s_i64:
114
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
115
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
116
break;
117
}
118
QEMU_FALLTHROUGH;
119
case INDEX_op_ext32u_i64:
120
- mask = 0xffffffffU;
121
+ z_mask = 0xffffffffU;
122
goto and_const;
123
124
CASE_OP_32_64(and):
125
- mask = arg_info(op->args[2])->mask;
126
+ z_mask = arg_info(op->args[2])->z_mask;
127
if (arg_is_const(op->args[2])) {
128
and_const:
129
- affected = arg_info(op->args[1])->mask & ~mask;
130
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
131
}
132
- mask = arg_info(op->args[1])->mask & mask;
133
+ z_mask = arg_info(op->args[1])->z_mask & z_mask;
134
break;
135
136
case INDEX_op_ext_i32_i64:
137
- if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
138
+ if ((arg_info(op->args[1])->z_mask & 0x80000000) != 0) {
139
break;
140
}
141
QEMU_FALLTHROUGH;
142
case INDEX_op_extu_i32_i64:
143
/* We do not compute affected as it is a size changing op. */
144
- mask = (uint32_t)arg_info(op->args[1])->mask;
145
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
146
break;
147
148
CASE_OP_32_64(andc):
149
/* Known-zeros does not imply known-ones. Therefore unless
150
op->args[2] is constant, we can't infer anything from it. */
151
if (arg_is_const(op->args[2])) {
152
- mask = ~arg_info(op->args[2])->mask;
153
+ z_mask = ~arg_info(op->args[2])->z_mask;
154
goto and_const;
155
}
156
/* But we certainly know nothing outside args[1] may be set. */
157
- mask = arg_info(op->args[1])->mask;
158
+ z_mask = arg_info(op->args[1])->z_mask;
159
break;
160
161
case INDEX_op_sar_i32:
162
if (arg_is_const(op->args[2])) {
163
tmp = arg_info(op->args[2])->val & 31;
164
- mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
165
+ z_mask = (int32_t)arg_info(op->args[1])->z_mask >> tmp;
166
}
167
break;
168
case INDEX_op_sar_i64:
169
if (arg_is_const(op->args[2])) {
170
tmp = arg_info(op->args[2])->val & 63;
171
- mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
172
+ z_mask = (int64_t)arg_info(op->args[1])->z_mask >> tmp;
173
}
174
break;
175
176
case INDEX_op_shr_i32:
177
if (arg_is_const(op->args[2])) {
178
tmp = arg_info(op->args[2])->val & 31;
179
- mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
180
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask >> tmp;
181
}
182
break;
183
case INDEX_op_shr_i64:
184
if (arg_is_const(op->args[2])) {
185
tmp = arg_info(op->args[2])->val & 63;
186
- mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
187
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> tmp;
188
}
189
break;
190
191
case INDEX_op_extrl_i64_i32:
192
- mask = (uint32_t)arg_info(op->args[1])->mask;
193
+ z_mask = (uint32_t)arg_info(op->args[1])->z_mask;
194
break;
195
case INDEX_op_extrh_i64_i32:
196
- mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
197
+ z_mask = (uint64_t)arg_info(op->args[1])->z_mask >> 32;
198
break;
199
200
CASE_OP_32_64(shl):
201
if (arg_is_const(op->args[2])) {
202
tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
203
- mask = arg_info(op->args[1])->mask << tmp;
204
+ z_mask = arg_info(op->args[1])->z_mask << tmp;
205
}
206
break;
207
208
CASE_OP_32_64(neg):
209
/* Set to 1 all bits to the left of the rightmost. */
210
- mask = -(arg_info(op->args[1])->mask
211
- & -arg_info(op->args[1])->mask);
212
+ z_mask = -(arg_info(op->args[1])->z_mask
213
+ & -arg_info(op->args[1])->z_mask);
214
break;
215
216
CASE_OP_32_64(deposit):
217
- mask = deposit64(arg_info(op->args[1])->mask,
218
- op->args[3], op->args[4],
219
- arg_info(op->args[2])->mask);
220
+ z_mask = deposit64(arg_info(op->args[1])->z_mask,
221
+ op->args[3], op->args[4],
222
+ arg_info(op->args[2])->z_mask);
223
break;
224
225
CASE_OP_32_64(extract):
226
- mask = extract64(arg_info(op->args[1])->mask,
227
- op->args[2], op->args[3]);
228
+ z_mask = extract64(arg_info(op->args[1])->z_mask,
229
+ op->args[2], op->args[3]);
230
if (op->args[2] == 0) {
231
- affected = arg_info(op->args[1])->mask & ~mask;
232
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
233
}
234
break;
235
CASE_OP_32_64(sextract):
236
- mask = sextract64(arg_info(op->args[1])->mask,
237
- op->args[2], op->args[3]);
238
- if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
239
- affected = arg_info(op->args[1])->mask & ~mask;
240
+ z_mask = sextract64(arg_info(op->args[1])->z_mask,
241
+ op->args[2], op->args[3]);
242
+ if (op->args[2] == 0 && (tcg_target_long)z_mask >= 0) {
243
+ affected = arg_info(op->args[1])->z_mask & ~z_mask;
244
}
245
break;
246
247
CASE_OP_32_64(or):
248
CASE_OP_32_64(xor):
249
- mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
250
+ z_mask = arg_info(op->args[1])->z_mask
251
+ | arg_info(op->args[2])->z_mask;
252
break;
253
254
case INDEX_op_clz_i32:
255
case INDEX_op_ctz_i32:
256
- mask = arg_info(op->args[2])->mask | 31;
257
+ z_mask = arg_info(op->args[2])->z_mask | 31;
258
break;
259
260
case INDEX_op_clz_i64:
261
case INDEX_op_ctz_i64:
262
- mask = arg_info(op->args[2])->mask | 63;
263
+ z_mask = arg_info(op->args[2])->z_mask | 63;
264
break;
265
266
case INDEX_op_ctpop_i32:
267
- mask = 32 | 31;
268
+ z_mask = 32 | 31;
269
break;
270
case INDEX_op_ctpop_i64:
271
- mask = 64 | 63;
272
+ z_mask = 64 | 63;
273
break;
274
275
CASE_OP_32_64(setcond):
276
case INDEX_op_setcond2_i32:
277
- mask = 1;
278
+ z_mask = 1;
279
break;
280
281
CASE_OP_32_64(movcond):
282
- mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
283
+ z_mask = arg_info(op->args[3])->z_mask
284
+ | arg_info(op->args[4])->z_mask;
285
break;
286
287
CASE_OP_32_64(ld8u):
288
- mask = 0xff;
289
+ z_mask = 0xff;
290
break;
291
CASE_OP_32_64(ld16u):
292
- mask = 0xffff;
293
+ z_mask = 0xffff;
294
break;
295
case INDEX_op_ld32u_i64:
296
- mask = 0xffffffffu;
297
+ z_mask = 0xffffffffu;
298
break;
299
300
CASE_OP_32_64(qemu_ld):
301
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
302
MemOpIdx oi = op->args[nb_oargs + nb_iargs];
303
MemOp mop = get_memop(oi);
304
if (!(mop & MO_SIGN)) {
305
- mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
306
+ z_mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
307
}
308
}
309
break;
310
311
CASE_OP_32_64(bswap16):
312
- mask = arg_info(op->args[1])->mask;
313
- if (mask <= 0xffff) {
314
+ z_mask = arg_info(op->args[1])->z_mask;
315
+ if (z_mask <= 0xffff) {
316
op->args[2] |= TCG_BSWAP_IZ;
317
}
318
- mask = bswap16(mask);
319
+ z_mask = bswap16(z_mask);
320
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
321
case TCG_BSWAP_OZ:
322
break;
323
case TCG_BSWAP_OS:
324
- mask = (int16_t)mask;
325
+ z_mask = (int16_t)z_mask;
326
break;
327
default: /* undefined high bits */
328
- mask |= MAKE_64BIT_MASK(16, 48);
329
+ z_mask |= MAKE_64BIT_MASK(16, 48);
330
break;
331
}
332
break;
333
334
case INDEX_op_bswap32_i64:
335
- mask = arg_info(op->args[1])->mask;
336
- if (mask <= 0xffffffffu) {
337
+ z_mask = arg_info(op->args[1])->z_mask;
338
+ if (z_mask <= 0xffffffffu) {
339
op->args[2] |= TCG_BSWAP_IZ;
340
}
341
- mask = bswap32(mask);
342
+ z_mask = bswap32(z_mask);
343
switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
344
case TCG_BSWAP_OZ:
345
break;
346
case TCG_BSWAP_OS:
347
- mask = (int32_t)mask;
348
+ z_mask = (int32_t)z_mask;
349
break;
350
default: /* undefined high bits */
351
- mask |= MAKE_64BIT_MASK(32, 32);
352
+ z_mask |= MAKE_64BIT_MASK(32, 32);
353
break;
354
}
355
break;
356
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
357
/* 32-bit ops generate 32-bit results. For the result is zero test
358
below, we can ignore high bits, but for further optimizations we
359
need to record that the high bits contain garbage. */
360
- partmask = mask;
361
+ partmask = z_mask;
362
if (!(def->flags & TCG_OPF_64BIT)) {
363
- mask |= ~(tcg_target_ulong)0xffffffffu;
364
+ z_mask |= ~(tcg_target_ulong)0xffffffffu;
365
partmask &= 0xffffffffu;
366
affected &= 0xffffffffu;
367
}
368
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
369
vs the high word of the input. */
370
do_setcond_high:
371
reset_temp(op->args[0]);
372
- arg_info(op->args[0])->mask = 1;
373
+ arg_info(op->args[0])->z_mask = 1;
374
op->opc = INDEX_op_setcond_i32;
375
op->args[1] = op->args[2];
376
op->args[2] = op->args[4];
377
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
378
}
379
do_setcond_low:
380
reset_temp(op->args[0]);
381
- arg_info(op->args[0])->mask = 1;
382
+ arg_info(op->args[0])->z_mask = 1;
383
op->opc = INDEX_op_setcond_i32;
384
op->args[2] = op->args[3];
385
op->args[3] = op->args[5];
386
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
387
/* Default case: we know nothing about operation (or were unable
388
to compute the operation result) so no propagation is done.
389
We trash everything if the operation is the end of a basic
390
- block, otherwise we only trash the output args. "mask" is
391
+ block, otherwise we only trash the output args. "z_mask" is
392
the non-zero bits mask for the first output arg. */
393
if (def->flags & TCG_OPF_BB_END) {
394
memset(&temps_used, 0, sizeof(temps_used));
395
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
396
/* Save the corresponding known-zero bits mask for the
397
first output argument (only one supported so far). */
398
if (i == 0) {
399
- arg_info(op->args[i])->mask = mask;
400
+ arg_info(op->args[i])->z_mask = z_mask;
401
}
402
}
403
}
404
--
130
--
405
2.25.1
131
2.34.1
406
132
407
133
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
These are atomic operations, so mark as requiring alignment.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
4
---
5
tcg/optimize.c | 27 ++++++++++++++++-----------
5
target/mips/tcg/nanomips_translate.c.inc | 5 +++--
6
1 file changed, 16 insertions(+), 11 deletions(-)
6
1 file changed, 3 insertions(+), 2 deletions(-)
7
7
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
8
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
9
index XXXXXXX..XXXXXXX 100644
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
10
--- a/target/mips/tcg/nanomips_translate.c.inc
11
+++ b/tcg/optimize.c
11
+++ b/target/mips/tcg/nanomips_translate.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_brcond2(OptContext *ctx, TCGOp *op)
12
@@ -XXX,XX +XXX,XX @@ static void gen_llwp(DisasContext *ctx, uint32_t base, int16_t offset,
13
return false;
13
TCGv tmp2 = tcg_temp_new();
14
}
14
15
15
gen_base_offset_addr(ctx, taddr, base, offset);
16
+static bool fold_bswap(OptContext *ctx, TCGOp *op)
16
- tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ);
17
+{
17
+ tcg_gen_qemu_ld_i64(tval, taddr, ctx->mem_idx, MO_TEUQ | MO_ALIGN);
18
+ if (arg_is_const(op->args[1])) {
18
if (cpu_is_bigendian(ctx)) {
19
+ uint64_t t = arg_info(op->args[1])->val;
19
tcg_gen_extr_i64_tl(tmp2, tmp1, tval);
20
+
20
} else {
21
+ t = do_constant_folding(op->opc, t, op->args[2]);
21
@@ -XXX,XX +XXX,XX @@ static void gen_scwp(DisasContext *ctx, uint32_t base, int16_t offset,
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t);
22
23
+ }
23
tcg_gen_ld_i64(llval, cpu_env, offsetof(CPUMIPSState, llval_wp));
24
+ return false;
24
tcg_gen_atomic_cmpxchg_i64(val, taddr, llval, tval,
25
+}
25
- eva ? MIPS_HFLAG_UM : ctx->mem_idx, MO_64);
26
+
26
+ eva ? MIPS_HFLAG_UM : ctx->mem_idx,
27
static bool fold_call(OptContext *ctx, TCGOp *op)
27
+ MO_64 | MO_ALIGN);
28
{
28
if (reg1 != 0) {
29
TCGContext *s = ctx->tcg;
29
tcg_gen_movi_tl(cpu_gpr[reg1], 1);
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
30
}
31
}
32
break;
33
34
- CASE_OP_32_64(bswap16):
35
- CASE_OP_32_64(bswap32):
36
- case INDEX_op_bswap64_i64:
37
- if (arg_is_const(op->args[1])) {
38
- tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
39
- op->args[2]);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
42
- }
43
- break;
44
-
45
default:
46
break;
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
case INDEX_op_brcond2_i32:
50
done = fold_brcond2(&ctx, op);
51
break;
52
+ CASE_OP_32_64(bswap16):
53
+ CASE_OP_32_64(bswap32):
54
+ case INDEX_op_bswap64_i64:
55
+ done = fold_bswap(&ctx, op);
56
+ break;
57
CASE_OP_32_64(clz):
58
CASE_OP_32_64(ctz):
59
done = fold_count_zeros(&ctx, op);
60
--
31
--
61
2.25.1
32
2.34.1
62
63
diff view generated by jsdifflib
1
From: Luis Pires <luis.pires@eldorado.org.br>
1
Memory operations that are not already aligned, or otherwise
2
marked up, require addition of ctx->default_tcg_memop_mask.
2
3
3
In preparation for changing the divu128/divs128 implementations
4
to allow for quotients larger than 64 bits, move the div-by-zero
5
and overflow checks to the callers.
6
7
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
8
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
9
Message-Id: <20211025191154.350831-2-luis.pires@eldorado.org.br>
10
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
---
5
---
12
include/hw/clock.h | 5 +++--
6
target/mips/tcg/mxu_translate.c | 3 ++-
13
include/qemu/host-utils.h | 34 ++++++++++++---------------------
7
target/mips/tcg/micromips_translate.c.inc | 24 ++++++++++++++--------
14
target/ppc/int_helper.c | 14 +++++++++-----
8
target/mips/tcg/mips16e_translate.c.inc | 18 ++++++++++------
15
util/host-utils.c | 40 ++++++++++++++++++---------------------
9
target/mips/tcg/nanomips_translate.c.inc | 25 +++++++++++------------
16
4 files changed, 42 insertions(+), 51 deletions(-)
10
4 files changed, 42 insertions(+), 28 deletions(-)
17
11
18
diff --git a/include/hw/clock.h b/include/hw/clock.h
12
diff --git a/target/mips/tcg/mxu_translate.c b/target/mips/tcg/mxu_translate.c
19
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
20
--- a/include/hw/clock.h
14
--- a/target/mips/tcg/mxu_translate.c
21
+++ b/include/hw/clock.h
15
+++ b/target/mips/tcg/mxu_translate.c
22
@@ -XXX,XX +XXX,XX @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
16
@@ -XXX,XX +XXX,XX @@ static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
23
return 0;
17
tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
24
}
18
}
25
/*
19
tcg_gen_add_tl(t1, t0, t1);
26
- * Ignore divu128() return value as we've caught div-by-zero and don't
20
- tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_TESL ^ (sel * MO_BSWAP));
27
- * need different behaviour for overflow.
21
+ tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, (MO_TESL ^ (sel * MO_BSWAP)) |
28
+ * BUG: when CONFIG_INT128 is not defined, the current implementation of
22
+ ctx->default_tcg_memop_mask);
29
+ * divu128 does not return a valid truncated quotient, so the result will
23
30
+ * be wrong.
24
gen_store_mxu_gpr(t1, XRa);
31
*/
32
divu128(&lo, &hi, clk->period);
33
return lo;
34
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
35
index XXXXXXX..XXXXXXX 100644
36
--- a/include/qemu/host-utils.h
37
+++ b/include/qemu/host-utils.h
38
@@ -XXX,XX +XXX,XX @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
39
return (__int128_t)a * b / c;
40
}
25
}
41
26
diff --git a/target/mips/tcg/micromips_translate.c.inc b/target/mips/tcg/micromips_translate.c.inc
42
-static inline int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
27
index XXXXXXX..XXXXXXX 100644
43
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
28
--- a/target/mips/tcg/micromips_translate.c.inc
44
{
29
+++ b/target/mips/tcg/micromips_translate.c.inc
45
- if (divisor == 0) {
30
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
46
- return 1;
31
gen_reserved_instruction(ctx);
47
- } else {
32
return;
48
- __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
33
}
49
- __uint128_t result = dividend / divisor;
34
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
50
- *plow = result;
35
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
51
- *phigh = dividend % divisor;
36
+ ctx->default_tcg_memop_mask);
52
- return result > UINT64_MAX;
37
gen_store_gpr(t1, rd);
53
- }
38
tcg_gen_movi_tl(t1, 4);
54
+ __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
39
gen_op_addr_add(ctx, t0, t0, t1);
55
+ __uint128_t result = dividend / divisor;
40
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL);
56
+ *plow = result;
41
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL |
57
+ *phigh = dividend % divisor;
42
+ ctx->default_tcg_memop_mask);
58
}
43
gen_store_gpr(t1, rd + 1);
59
44
break;
60
-static inline int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
45
case SWP:
61
+static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
46
gen_load_gpr(t1, rd);
62
{
47
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
63
- if (divisor == 0) {
48
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
64
- return 1;
49
+ ctx->default_tcg_memop_mask);
65
- } else {
50
tcg_gen_movi_tl(t1, 4);
66
- __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
51
gen_op_addr_add(ctx, t0, t0, t1);
67
- __int128_t result = dividend / divisor;
52
gen_load_gpr(t1, rd + 1);
68
- *plow = result;
53
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
69
- *phigh = dividend % divisor;
54
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
70
- return result != *plow;
55
+ ctx->default_tcg_memop_mask);
71
- }
56
break;
72
+ __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
57
#ifdef TARGET_MIPS64
73
+ __int128_t result = dividend / divisor;
58
case LDP:
74
+ *plow = result;
59
@@ -XXX,XX +XXX,XX @@ static void gen_ldst_pair(DisasContext *ctx, uint32_t opc, int rd,
75
+ *phigh = dividend % divisor;
60
gen_reserved_instruction(ctx);
76
}
61
return;
77
#else
62
}
78
void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
63
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
79
void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
64
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
80
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
65
+ ctx->default_tcg_memop_mask);
81
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
66
gen_store_gpr(t1, rd);
82
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
67
tcg_gen_movi_tl(t1, 8);
83
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
68
gen_op_addr_add(ctx, t0, t0, t1);
84
69
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
85
static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
70
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
86
{
71
+ ctx->default_tcg_memop_mask);
87
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
72
gen_store_gpr(t1, rd + 1);
88
index XXXXXXX..XXXXXXX 100644
73
break;
89
--- a/target/ppc/int_helper.c
74
case SDP:
90
+++ b/target/ppc/int_helper.c
75
gen_load_gpr(t1, rd);
91
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
76
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
92
uint64_t rt = 0;
77
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
93
int overflow = 0;
78
+ ctx->default_tcg_memop_mask);
94
79
tcg_gen_movi_tl(t1, 8);
95
- overflow = divu128(&rt, &ra, rb);
80
gen_op_addr_add(ctx, t0, t0, t1);
96
-
81
gen_load_gpr(t1, rd + 1);
97
- if (unlikely(overflow)) {
82
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ);
98
+ if (unlikely(rb == 0 || ra >= rb)) {
83
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUQ |
99
+ overflow = 1;
84
+ ctx->default_tcg_memop_mask);
100
rt = 0; /* Undefined */
85
break;
101
+ } else {
86
#endif
102
+ divu128(&rt, &ra, rb);
103
}
87
}
104
88
diff --git a/target/mips/tcg/mips16e_translate.c.inc b/target/mips/tcg/mips16e_translate.c.inc
105
if (oe) {
89
index XXXXXXX..XXXXXXX 100644
106
@@ -XXX,XX +XXX,XX @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
90
--- a/target/mips/tcg/mips16e_translate.c.inc
107
int64_t rt = 0;
91
+++ b/target/mips/tcg/mips16e_translate.c.inc
108
int64_t ra = (int64_t)rau;
92
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_save(DisasContext *ctx,
109
int64_t rb = (int64_t)rbu;
93
case 4:
110
- int overflow = divs128(&rt, &ra, rb);
94
gen_base_offset_addr(ctx, t0, 29, 12);
111
+ int overflow = 0;
95
gen_load_gpr(t1, 7);
112
96
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
113
- if (unlikely(overflow)) {
97
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
114
+ if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
98
+ ctx->default_tcg_memop_mask);
115
+ overflow = 1;
99
/* Fall through */
116
rt = 0; /* Undefined */
100
case 3:
117
+ } else {
101
gen_base_offset_addr(ctx, t0, 29, 8);
118
+ divs128(&rt, &ra, rb);
102
gen_load_gpr(t1, 6);
103
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
104
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
105
+ ctx->default_tcg_memop_mask);
106
/* Fall through */
107
case 2:
108
gen_base_offset_addr(ctx, t0, 29, 4);
109
gen_load_gpr(t1, 5);
110
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
111
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
112
+ ctx->default_tcg_memop_mask);
113
/* Fall through */
114
case 1:
115
gen_base_offset_addr(ctx, t0, 29, 0);
116
gen_load_gpr(t1, 4);
117
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
118
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL |
119
+ ctx->default_tcg_memop_mask);
119
}
120
}
120
121
121
if (oe) {
122
gen_load_gpr(t0, 29);
122
diff --git a/util/host-utils.c b/util/host-utils.c
123
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_save(DisasContext *ctx,
123
index XXXXXXX..XXXXXXX 100644
124
tcg_gen_movi_tl(t2, -4); \
124
--- a/util/host-utils.c
125
gen_op_addr_add(ctx, t0, t0, t2); \
125
+++ b/util/host-utils.c
126
gen_load_gpr(t1, reg); \
126
@@ -XXX,XX +XXX,XX @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
127
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL); \
127
*phigh = rh;
128
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL | \
128
}
129
+ ctx->default_tcg_memop_mask); \
129
130
} while (0)
130
-/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
131
131
-/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
132
if (do_ra) {
132
-/* remainder via phigh. */
133
@@ -XXX,XX +XXX,XX @@ static void gen_mips16_restore(DisasContext *ctx,
133
-int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
134
#define DECR_AND_LOAD(reg) do { \
134
+/*
135
tcg_gen_movi_tl(t2, -4); \
135
+ * Unsigned 128-by-64 division. Returns quotient via plow and
136
gen_op_addr_add(ctx, t0, t0, t2); \
136
+ * remainder via phigh.
137
- tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL); \
137
+ * The result must fit in 64 bits (plow) - otherwise, the result
138
+ tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_TESL | \
138
+ * is undefined.
139
+ ctx->default_tcg_memop_mask); \
139
+ * This function will cause a division by zero if passed a zero divisor.
140
gen_store_gpr(t1, reg); \
140
+ */
141
} while (0)
141
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
142
142
{
143
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
143
uint64_t dhi = *phigh;
144
index XXXXXXX..XXXXXXX 100644
144
uint64_t dlo = *plow;
145
--- a/target/mips/tcg/nanomips_translate.c.inc
145
unsigned i;
146
+++ b/target/mips/tcg/nanomips_translate.c.inc
146
uint64_t carry = 0;
147
@@ -XXX,XX +XXX,XX @@ static void gen_p_lsx(DisasContext *ctx, int rd, int rs, int rt)
147
148
148
- if (divisor == 0) {
149
switch (extract32(ctx->opcode, 7, 4)) {
149
- return 1;
150
case NM_LBX:
150
- } else if (dhi == 0) {
151
- tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
151
+ if (divisor == 0 || dhi == 0) {
152
- MO_SB);
152
*plow = dlo / divisor;
153
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SB);
153
*phigh = dlo % divisor;
154
gen_store_gpr(t0, rd);
154
- return 0;
155
break;
155
- } else if (dhi >= divisor) {
156
case NM_LHX:
156
- return 1;
157
/*case NM_LHXS:*/
157
} else {
158
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
158
159
- MO_TESW);
159
for (i = 0; i < 64; i++) {
160
+ MO_TESW | ctx->default_tcg_memop_mask);
160
@@ -XXX,XX +XXX,XX @@ int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
161
gen_store_gpr(t0, rd);
161
162
break;
162
*plow = dlo;
163
case NM_LWX:
163
*phigh = dhi;
164
/*case NM_LWXS:*/
164
- return 0;
165
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
165
}
166
- MO_TESL);
166
}
167
+ MO_TESL | ctx->default_tcg_memop_mask);
167
168
gen_store_gpr(t0, rd);
168
-int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
169
break;
169
+/*
170
case NM_LBUX:
170
+ * Signed 128-by-64 division. Returns quotient via plow and
171
- tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
171
+ * remainder via phigh.
172
- MO_UB);
172
+ * The result must fit in 64 bits (plow) - otherwise, the result
173
+ tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_UB);
173
+ * is undefined.
174
gen_store_gpr(t0, rd);
174
+ * This function will cause a division by zero if passed a zero divisor.
175
break;
175
+ */
176
case NM_LHUX:
176
+void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
177
/*case NM_LHUXS:*/
177
{
178
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx,
178
int sgn_dvdnd = *phigh < 0;
179
- MO_TEUW);
179
int sgn_divsr = divisor < 0;
180
+ MO_TEUW | ctx->default_tcg_memop_mask);
180
- int overflow = 0;
181
gen_store_gpr(t0, rd);
181
182
break;
182
if (sgn_dvdnd) {
183
case NM_SBX:
183
*plow = ~(*plow);
184
check_nms(ctx);
184
@@ -XXX,XX +XXX,XX @@ int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
185
gen_load_gpr(t1, rd);
185
divisor = 0 - divisor;
186
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
186
}
187
- MO_8);
187
188
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_8);
188
- overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
189
break;
189
+ divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
190
case NM_SHX:
190
191
/*case NM_SHXS:*/
191
if (sgn_dvdnd ^ sgn_divsr) {
192
check_nms(ctx);
192
*plow = 0 - *plow;
193
gen_load_gpr(t1, rd);
193
}
194
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
194
-
195
- MO_TEUW);
195
- if (!overflow) {
196
+ MO_TEUW | ctx->default_tcg_memop_mask);
196
- if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
197
break;
197
- overflow = 1;
198
case NM_SWX:
198
- }
199
/*case NM_SWXS:*/
199
- }
200
check_nms(ctx);
200
-
201
gen_load_gpr(t1, rd);
201
- return overflow;
202
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
202
}
203
- MO_TEUL);
203
#endif
204
+ MO_TEUL | ctx->default_tcg_memop_mask);
204
205
break;
206
case NM_LWC1X:
207
/*case NM_LWC1XS:*/
208
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
209
addr_off);
210
211
tcg_gen_movi_tl(t0, addr);
212
- tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx, MO_TESL);
213
+ tcg_gen_qemu_ld_tl(cpu_gpr[rt], t0, ctx->mem_idx,
214
+ MO_TESL | ctx->default_tcg_memop_mask);
215
}
216
break;
217
case NM_SWPC48:
218
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
219
tcg_gen_movi_tl(t0, addr);
220
gen_load_gpr(t1, rt);
221
222
- tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_TEUL);
223
+ tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
224
+ MO_TEUL | ctx->default_tcg_memop_mask);
225
}
226
break;
227
default:
205
--
228
--
206
2.25.1
229
2.34.1
207
208
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
The opposite of MO_UNALN is MO_ALIGN.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/optimize.c | 25 +++++++++++++++----------
6
target/mips/tcg/nanomips_translate.c.inc | 2 +-
6
1 file changed, 15 insertions(+), 10 deletions(-)
7
1 file changed, 1 insertion(+), 1 deletion(-)
7
8
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/target/mips/tcg/nanomips_translate.c.inc b/target/mips/tcg/nanomips_translate.c.inc
9
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
--- a/target/mips/tcg/nanomips_translate.c.inc
11
+++ b/tcg/optimize.c
12
+++ b/target/mips/tcg/nanomips_translate.c.inc
12
@@ -XXX,XX +XXX,XX @@ static bool fold_ctpop(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static int decode_nanomips_32_48_opc(CPUMIPSState *env, DisasContext *ctx)
13
return fold_const1(ctx, op);
14
TCGv va = tcg_temp_new();
14
}
15
TCGv t1 = tcg_temp_new();
15
16
MemOp memop = (extract32(ctx->opcode, 8, 3)) ==
16
+static bool fold_deposit(OptContext *ctx, TCGOp *op)
17
- NM_P_LS_UAWM ? MO_UNALN : 0;
17
+{
18
+ NM_P_LS_UAWM ? MO_UNALN : MO_ALIGN;
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
19
19
+ uint64_t t1 = arg_info(op->args[1])->val;
20
count = (count == 0) ? 8 : count;
20
+ uint64_t t2 = arg_info(op->args[2])->val;
21
while (counter != count) {
21
+
22
+ t1 = deposit64(t1, op->args[3], op->args[4], t2);
23
+ return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
24
+ }
25
+ return false;
26
+}
27
+
28
static bool fold_divide(OptContext *ctx, TCGOp *op)
29
{
30
return fold_const2(ctx, op);
31
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
32
}
33
break;
34
35
- CASE_OP_32_64(deposit):
36
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
37
- tmp = deposit64(arg_info(op->args[1])->val,
38
- op->args[3], op->args[4],
39
- arg_info(op->args[2])->val);
40
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
41
- continue;
42
- }
43
- break;
44
-
45
default:
46
break;
47
48
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
CASE_OP_32_64(ctpop):
50
done = fold_ctpop(&ctx, op);
51
break;
52
+ CASE_OP_32_64(deposit):
53
+ done = fold_deposit(&ctx, op);
54
+ break;
55
CASE_OP_32_64(div):
56
CASE_OP_32_64(divu):
57
done = fold_divide(&ctx, op);
58
--
22
--
59
2.25.1
23
2.34.1
60
24
61
25
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
2
---
5
tcg/optimize.c | 39 ++++++++++++++++++++++-----------------
3
configs/targets/mips-linux-user.mak | 1 -
6
1 file changed, 22 insertions(+), 17 deletions(-)
4
configs/targets/mips-softmmu.mak | 1 -
5
configs/targets/mips64-linux-user.mak | 1 -
6
configs/targets/mips64-softmmu.mak | 1 -
7
configs/targets/mips64el-linux-user.mak | 1 -
8
configs/targets/mips64el-softmmu.mak | 1 -
9
configs/targets/mipsel-linux-user.mak | 1 -
10
configs/targets/mipsel-softmmu.mak | 1 -
11
configs/targets/mipsn32-linux-user.mak | 1 -
12
configs/targets/mipsn32el-linux-user.mak | 1 -
13
10 files changed, 10 deletions(-)
7
14
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/configs/targets/mips-linux-user.mak b/configs/targets/mips-linux-user.mak
9
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
17
--- a/configs/targets/mips-linux-user.mak
11
+++ b/tcg/optimize.c
18
+++ b/configs/targets/mips-linux-user.mak
12
@@ -XXX,XX +XXX,XX @@ static bool fold_eqv(OptContext *ctx, TCGOp *op)
19
@@ -XXX,XX +XXX,XX @@ TARGET_ARCH=mips
13
return fold_const2(ctx, op);
20
TARGET_ABI_MIPSO32=y
14
}
21
TARGET_SYSTBL_ABI=o32
15
22
TARGET_SYSTBL=syscall_o32.tbl
16
+static bool fold_extract2(OptContext *ctx, TCGOp *op)
23
-TARGET_ALIGNED_ONLY=y
17
+{
24
TARGET_BIG_ENDIAN=y
18
+ if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
25
diff --git a/configs/targets/mips-softmmu.mak b/configs/targets/mips-softmmu.mak
19
+ uint64_t v1 = arg_info(op->args[1])->val;
26
index XXXXXXX..XXXXXXX 100644
20
+ uint64_t v2 = arg_info(op->args[2])->val;
27
--- a/configs/targets/mips-softmmu.mak
21
+ int shr = op->args[3];
28
+++ b/configs/targets/mips-softmmu.mak
22
+
29
@@ -XXX,XX +XXX,XX @@
23
+ if (op->opc == INDEX_op_extract2_i64) {
30
TARGET_ARCH=mips
24
+ v1 >>= shr;
31
-TARGET_ALIGNED_ONLY=y
25
+ v2 <<= 64 - shr;
32
TARGET_BIG_ENDIAN=y
26
+ } else {
33
TARGET_SUPPORTS_MTTCG=y
27
+ v1 = (uint32_t)v1 >> shr;
34
diff --git a/configs/targets/mips64-linux-user.mak b/configs/targets/mips64-linux-user.mak
28
+ v2 = (int32_t)v2 << (32 - shr);
35
index XXXXXXX..XXXXXXX 100644
29
+ }
36
--- a/configs/targets/mips64-linux-user.mak
30
+ return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
37
+++ b/configs/targets/mips64-linux-user.mak
31
+ }
38
@@ -XXX,XX +XXX,XX @@ TARGET_ABI_MIPSN64=y
32
+ return false;
39
TARGET_BASE_ARCH=mips
33
+}
40
TARGET_SYSTBL_ABI=n64
34
+
41
TARGET_SYSTBL=syscall_n64.tbl
35
static bool fold_exts(OptContext *ctx, TCGOp *op)
42
-TARGET_ALIGNED_ONLY=y
36
{
43
TARGET_BIG_ENDIAN=y
37
return fold_const1(ctx, op);
44
diff --git a/configs/targets/mips64-softmmu.mak b/configs/targets/mips64-softmmu.mak
38
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
45
index XXXXXXX..XXXXXXX 100644
39
}
46
--- a/configs/targets/mips64-softmmu.mak
40
break;
47
+++ b/configs/targets/mips64-softmmu.mak
41
48
@@ -XXX,XX +XXX,XX @@
42
- CASE_OP_32_64(extract2):
49
TARGET_ARCH=mips64
43
- if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
50
TARGET_BASE_ARCH=mips
44
- uint64_t v1 = arg_info(op->args[1])->val;
51
-TARGET_ALIGNED_ONLY=y
45
- uint64_t v2 = arg_info(op->args[2])->val;
52
TARGET_BIG_ENDIAN=y
46
- int shr = op->args[3];
53
diff --git a/configs/targets/mips64el-linux-user.mak b/configs/targets/mips64el-linux-user.mak
47
-
54
index XXXXXXX..XXXXXXX 100644
48
- if (opc == INDEX_op_extract2_i64) {
55
--- a/configs/targets/mips64el-linux-user.mak
49
- tmp = (v1 >> shr) | (v2 << (64 - shr));
56
+++ b/configs/targets/mips64el-linux-user.mak
50
- } else {
57
@@ -XXX,XX +XXX,XX @@ TARGET_ABI_MIPSN64=y
51
- tmp = (int32_t)(((uint32_t)v1 >> shr) |
58
TARGET_BASE_ARCH=mips
52
- ((uint32_t)v2 << (32 - shr)));
59
TARGET_SYSTBL_ABI=n64
53
- }
60
TARGET_SYSTBL=syscall_n64.tbl
54
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
61
-TARGET_ALIGNED_ONLY=y
55
- continue;
62
diff --git a/configs/targets/mips64el-softmmu.mak b/configs/targets/mips64el-softmmu.mak
56
- }
63
index XXXXXXX..XXXXXXX 100644
57
- break;
64
--- a/configs/targets/mips64el-softmmu.mak
58
-
65
+++ b/configs/targets/mips64el-softmmu.mak
59
default:
66
@@ -XXX,XX +XXX,XX @@
60
break;
67
TARGET_ARCH=mips64
61
68
TARGET_BASE_ARCH=mips
62
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
69
-TARGET_ALIGNED_ONLY=y
63
CASE_OP_32_64(eqv):
70
TARGET_NEED_FDT=y
64
done = fold_eqv(&ctx, op);
71
diff --git a/configs/targets/mipsel-linux-user.mak b/configs/targets/mipsel-linux-user.mak
65
break;
72
index XXXXXXX..XXXXXXX 100644
66
+ CASE_OP_32_64(extract2):
73
--- a/configs/targets/mipsel-linux-user.mak
67
+ done = fold_extract2(&ctx, op);
74
+++ b/configs/targets/mipsel-linux-user.mak
68
+ break;
75
@@ -XXX,XX +XXX,XX @@ TARGET_ARCH=mips
69
CASE_OP_32_64(ext8s):
76
TARGET_ABI_MIPSO32=y
70
CASE_OP_32_64(ext16s):
77
TARGET_SYSTBL_ABI=o32
71
case INDEX_op_ext32s_i64:
78
TARGET_SYSTBL=syscall_o32.tbl
79
-TARGET_ALIGNED_ONLY=y
80
diff --git a/configs/targets/mipsel-softmmu.mak b/configs/targets/mipsel-softmmu.mak
81
index XXXXXXX..XXXXXXX 100644
82
--- a/configs/targets/mipsel-softmmu.mak
83
+++ b/configs/targets/mipsel-softmmu.mak
84
@@ -XXX,XX +XXX,XX @@
85
TARGET_ARCH=mips
86
-TARGET_ALIGNED_ONLY=y
87
TARGET_SUPPORTS_MTTCG=y
88
diff --git a/configs/targets/mipsn32-linux-user.mak b/configs/targets/mipsn32-linux-user.mak
89
index XXXXXXX..XXXXXXX 100644
90
--- a/configs/targets/mipsn32-linux-user.mak
91
+++ b/configs/targets/mipsn32-linux-user.mak
92
@@ -XXX,XX +XXX,XX @@ TARGET_ABI32=y
93
TARGET_BASE_ARCH=mips
94
TARGET_SYSTBL_ABI=n32
95
TARGET_SYSTBL=syscall_n32.tbl
96
-TARGET_ALIGNED_ONLY=y
97
TARGET_BIG_ENDIAN=y
98
diff --git a/configs/targets/mipsn32el-linux-user.mak b/configs/targets/mipsn32el-linux-user.mak
99
index XXXXXXX..XXXXXXX 100644
100
--- a/configs/targets/mipsn32el-linux-user.mak
101
+++ b/configs/targets/mipsn32el-linux-user.mak
102
@@ -XXX,XX +XXX,XX @@ TARGET_ABI32=y
103
TARGET_BASE_ARCH=mips
104
TARGET_SYSTBL_ABI=n32
105
TARGET_SYSTBL=syscall_n32.tbl
106
-TARGET_ALIGNED_ONLY=y
72
--
107
--
73
2.25.1
108
2.34.1
74
75
diff view generated by jsdifflib
1
Add two additional helpers, fold_add2_i32 and fold_sub2_i32
1
In gen_ldx/gen_stx, the only two locations for memory operations,
2
which will not be simple wrappers forever.
2
mark the operation as either aligned (softmmu) or unaligned
3
(user-only, as if emulated by the kernel).
3
4
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 70 +++++++++++++++++++++++++++++++-------------------
8
configs/targets/nios2-softmmu.mak | 1 -
9
1 file changed, 44 insertions(+), 26 deletions(-)
9
target/nios2/translate.c | 10 ++++++++++
10
2 files changed, 10 insertions(+), 1 deletion(-)
10
11
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/configs/targets/nios2-softmmu.mak b/configs/targets/nios2-softmmu.mak
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
--- a/configs/targets/nios2-softmmu.mak
14
+++ b/tcg/optimize.c
15
+++ b/configs/targets/nios2-softmmu.mak
15
@@ -XXX,XX +XXX,XX @@ static bool fold_add(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@
16
return fold_const2(ctx, op);
17
TARGET_ARCH=nios2
18
-TARGET_ALIGNED_ONLY=y
19
TARGET_NEED_FDT=y
20
diff --git a/target/nios2/translate.c b/target/nios2/translate.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/target/nios2/translate.c
23
+++ b/target/nios2/translate.c
24
@@ -XXX,XX +XXX,XX @@ static void gen_ldx(DisasContext *dc, uint32_t code, uint32_t flags)
25
TCGv data = dest_gpr(dc, instr.b);
26
27
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
28
+#ifdef CONFIG_USER_ONLY
29
+ flags |= MO_UNALN;
30
+#else
31
+ flags |= MO_ALIGN;
32
+#endif
33
tcg_gen_qemu_ld_tl(data, addr, dc->mem_idx, flags);
17
}
34
}
18
35
19
+static bool fold_addsub2_i32(OptContext *ctx, TCGOp *op, bool add)
36
@@ -XXX,XX +XXX,XX @@ static void gen_stx(DisasContext *dc, uint32_t code, uint32_t flags)
20
+{
37
21
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3]) &&
38
TCGv addr = tcg_temp_new();
22
+ arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
39
tcg_gen_addi_tl(addr, load_gpr(dc, instr.a), instr.imm16.s);
23
+ uint32_t al = arg_info(op->args[2])->val;
40
+#ifdef CONFIG_USER_ONLY
24
+ uint32_t ah = arg_info(op->args[3])->val;
41
+ flags |= MO_UNALN;
25
+ uint32_t bl = arg_info(op->args[4])->val;
42
+#else
26
+ uint32_t bh = arg_info(op->args[5])->val;
43
+ flags |= MO_ALIGN;
27
+ uint64_t a = ((uint64_t)ah << 32) | al;
44
+#endif
28
+ uint64_t b = ((uint64_t)bh << 32) | bl;
45
tcg_gen_qemu_st_tl(val, addr, dc->mem_idx, flags);
29
+ TCGArg rl, rh;
30
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
31
+
32
+ if (add) {
33
+ a += b;
34
+ } else {
35
+ a -= b;
36
+ }
37
+
38
+ rl = op->args[0];
39
+ rh = op->args[1];
40
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)a);
41
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(a >> 32));
42
+ return true;
43
+ }
44
+ return false;
45
+}
46
+
47
+static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
48
+{
49
+ return fold_addsub2_i32(ctx, op, true);
50
+}
51
+
52
static bool fold_and(OptContext *ctx, TCGOp *op)
53
{
54
return fold_const2(ctx, op);
55
@@ -XXX,XX +XXX,XX @@ static bool fold_sub(OptContext *ctx, TCGOp *op)
56
return fold_const2(ctx, op);
57
}
46
}
58
47
59
+static bool fold_sub2_i32(OptContext *ctx, TCGOp *op)
60
+{
61
+ return fold_addsub2_i32(ctx, op, false);
62
+}
63
+
64
static bool fold_xor(OptContext *ctx, TCGOp *op)
65
{
66
return fold_const2(ctx, op);
67
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
68
}
69
break;
70
71
- case INDEX_op_add2_i32:
72
- case INDEX_op_sub2_i32:
73
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
74
- && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
75
- uint32_t al = arg_info(op->args[2])->val;
76
- uint32_t ah = arg_info(op->args[3])->val;
77
- uint32_t bl = arg_info(op->args[4])->val;
78
- uint32_t bh = arg_info(op->args[5])->val;
79
- uint64_t a = ((uint64_t)ah << 32) | al;
80
- uint64_t b = ((uint64_t)bh << 32) | bl;
81
- TCGArg rl, rh;
82
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
83
-
84
- if (opc == INDEX_op_add2_i32) {
85
- a += b;
86
- } else {
87
- a -= b;
88
- }
89
-
90
- rl = op->args[0];
91
- rh = op->args[1];
92
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)a);
93
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(a >> 32));
94
- continue;
95
- }
96
- break;
97
98
default:
99
break;
100
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
101
CASE_OP_32_64_VEC(add):
102
done = fold_add(&ctx, op);
103
break;
104
+ case INDEX_op_add2_i32:
105
+ done = fold_add2_i32(&ctx, op);
106
+ break;
107
CASE_OP_32_64_VEC(and):
108
done = fold_and(&ctx, op);
109
break;
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
CASE_OP_32_64_VEC(sub):
112
done = fold_sub(&ctx, op);
113
break;
114
+ case INDEX_op_sub2_i32:
115
+ done = fold_sub2_i32(&ctx, op);
116
+ break;
117
CASE_OP_32_64_VEC(xor):
118
done = fold_xor(&ctx, op);
119
break;
120
--
48
--
121
2.25.1
49
2.34.1
122
50
123
51
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Mark all memory operations that are not already marked with UNALIGN.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/optimize.c | 56 ++++++++++++++++++++++++++++----------------------
6
target/sh4/translate.c | 102 ++++++++++++++++++++++++++---------------
6
1 file changed, 31 insertions(+), 25 deletions(-)
7
1 file changed, 66 insertions(+), 36 deletions(-)
7
8
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
9
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
9
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
11
--- a/target/sh4/translate.c
11
+++ b/tcg/optimize.c
12
+++ b/target/sh4/translate.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mb(OptContext *ctx, TCGOp *op)
13
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
13
return true;
14
case 0x9000:        /* mov.w @(disp,PC),Rn */
14
}
15
    {
15
16
TCGv addr = tcg_constant_i32(ctx->base.pc_next + 4 + B7_0 * 2);
16
+static bool fold_movcond(OptContext *ctx, TCGOp *op)
17
- tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESW);
17
+{
18
+ tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
18
+ TCGOpcode opc = op->opc;
19
+ MO_TESW | MO_ALIGN);
19
+ TCGCond cond = op->args[5];
20
    }
20
+ int i = do_constant_folding_cond(opc, op->args[1], op->args[2], cond);
21
    return;
21
+
22
case 0xd000:        /* mov.l @(disp,PC),Rn */
22
+ if (i >= 0) {
23
    {
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
24
TCGv addr = tcg_constant_i32((ctx->base.pc_next + 4 + B7_0 * 4) & ~3);
24
+ }
25
- tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx, MO_TESL);
25
+
26
+ tcg_gen_qemu_ld_i32(REG(B11_8), addr, ctx->memidx,
26
+ if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
27
+ MO_TESL | MO_ALIGN);
27
+ uint64_t tv = arg_info(op->args[3])->val;
28
    }
28
+ uint64_t fv = arg_info(op->args[4])->val;
29
    return;
29
+
30
case 0x7000:        /* add #imm,Rn */
30
+ opc = (opc == INDEX_op_movcond_i32
31
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
31
+ ? INDEX_op_setcond_i32 : INDEX_op_setcond_i64);
32
    {
32
+
33
     TCGv arg0, arg1;
33
+ if (tv == 1 && fv == 0) {
34
     arg0 = tcg_temp_new();
34
+ op->opc = opc;
35
- tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
35
+ op->args[3] = cond;
36
+ tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
36
+ } else if (fv == 1 && tv == 0) {
37
+ MO_TESL | MO_ALIGN);
37
+ op->opc = opc;
38
     arg1 = tcg_temp_new();
38
+ op->args[3] = tcg_invert_cond(cond);
39
- tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
39
+ }
40
+ tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
40
+ }
41
+ MO_TESL | MO_ALIGN);
41
+ return false;
42
gen_helper_macl(cpu_env, arg0, arg1);
42
+}
43
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
43
+
44
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
44
static bool fold_mul(OptContext *ctx, TCGOp *op)
45
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
45
{
46
    {
46
return fold_const2(ctx, op);
47
     TCGv arg0, arg1;
47
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
48
     arg0 = tcg_temp_new();
49
- tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx, MO_TESL);
50
+ tcg_gen_qemu_ld_i32(arg0, REG(B7_4), ctx->memidx,
51
+ MO_TESL | MO_ALIGN);
52
     arg1 = tcg_temp_new();
53
- tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx, MO_TESL);
54
+ tcg_gen_qemu_ld_i32(arg1, REG(B11_8), ctx->memidx,
55
+ MO_TESL | MO_ALIGN);
56
gen_helper_macw(cpu_env, arg0, arg1);
57
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
58
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 2);
59
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
60
if (ctx->tbflags & FPSCR_SZ) {
61
TCGv_i64 fp = tcg_temp_new_i64();
62
gen_load_fpr64(ctx, fp, XHACK(B7_4));
63
- tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx, MO_TEUQ);
64
+ tcg_gen_qemu_st_i64(fp, REG(B11_8), ctx->memidx,
65
+ MO_TEUQ | MO_ALIGN);
66
    } else {
67
- tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx, MO_TEUL);
68
+ tcg_gen_qemu_st_i32(FREG(B7_4), REG(B11_8), ctx->memidx,
69
+ MO_TEUL | MO_ALIGN);
70
    }
71
    return;
72
case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
73
    CHECK_FPU_ENABLED
74
if (ctx->tbflags & FPSCR_SZ) {
75
TCGv_i64 fp = tcg_temp_new_i64();
76
- tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
77
+ tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
78
+ MO_TEUQ | MO_ALIGN);
79
gen_store_fpr64(ctx, fp, XHACK(B11_8));
80
    } else {
81
- tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
82
+ tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
83
+ MO_TEUL | MO_ALIGN);
84
    }
85
    return;
86
case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
87
    CHECK_FPU_ENABLED
88
if (ctx->tbflags & FPSCR_SZ) {
89
TCGv_i64 fp = tcg_temp_new_i64();
90
- tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx, MO_TEUQ);
91
+ tcg_gen_qemu_ld_i64(fp, REG(B7_4), ctx->memidx,
92
+ MO_TEUQ | MO_ALIGN);
93
gen_store_fpr64(ctx, fp, XHACK(B11_8));
94
tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 8);
95
    } else {
96
- tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx, MO_TEUL);
97
+ tcg_gen_qemu_ld_i32(FREG(B11_8), REG(B7_4), ctx->memidx,
98
+ MO_TEUL | MO_ALIGN);
99
     tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
100
    }
101
    return;
102
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
103
TCGv_i64 fp = tcg_temp_new_i64();
104
gen_load_fpr64(ctx, fp, XHACK(B7_4));
105
tcg_gen_subi_i32(addr, REG(B11_8), 8);
106
- tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
107
+ tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
108
+ MO_TEUQ | MO_ALIGN);
109
} else {
110
tcg_gen_subi_i32(addr, REG(B11_8), 4);
111
- tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
112
+ tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
113
+ MO_TEUL | MO_ALIGN);
48
}
114
}
49
break;
115
tcg_gen_mov_i32(REG(B11_8), addr);
50
116
}
51
- CASE_OP_32_64(movcond):
117
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
52
- i = do_constant_folding_cond(opc, op->args[1],
118
     tcg_gen_add_i32(addr, REG(B7_4), REG(0));
53
- op->args[2], op->args[5]);
119
if (ctx->tbflags & FPSCR_SZ) {
54
- if (i >= 0) {
120
TCGv_i64 fp = tcg_temp_new_i64();
55
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
121
- tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx, MO_TEUQ);
56
- continue;
122
+ tcg_gen_qemu_ld_i64(fp, addr, ctx->memidx,
57
- }
123
+ MO_TEUQ | MO_ALIGN);
58
- if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
124
gen_store_fpr64(ctx, fp, XHACK(B11_8));
59
- uint64_t tv = arg_info(op->args[3])->val;
125
     } else {
60
- uint64_t fv = arg_info(op->args[4])->val;
126
- tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx, MO_TEUL);
61
- TCGCond cond = op->args[5];
127
+ tcg_gen_qemu_ld_i32(FREG(B11_8), addr, ctx->memidx,
62
-
128
+ MO_TEUL | MO_ALIGN);
63
- if (fv == 1 && tv == 0) {
129
     }
64
- cond = tcg_invert_cond(cond);
130
    }
65
- } else if (!(tv == 1 && fv == 0)) {
131
    return;
66
- break;
132
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
67
- }
133
if (ctx->tbflags & FPSCR_SZ) {
68
- op->args[3] = cond;
134
TCGv_i64 fp = tcg_temp_new_i64();
69
- op->opc = opc = (opc == INDEX_op_movcond_i32
135
gen_load_fpr64(ctx, fp, XHACK(B7_4));
70
- ? INDEX_op_setcond_i32
136
- tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEUQ);
71
- : INDEX_op_setcond_i64);
137
+ tcg_gen_qemu_st_i64(fp, addr, ctx->memidx,
72
- }
138
+ MO_TEUQ | MO_ALIGN);
73
- break;
139
     } else {
74
-
140
- tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
75
-
141
+ tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx,
76
default:
142
+ MO_TEUL | MO_ALIGN);
77
break;
143
     }
78
144
    }
79
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
145
    return;
80
case INDEX_op_mb:
146
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
81
done = fold_mb(&ctx, op);
147
    {
82
break;
148
     TCGv addr = tcg_temp_new();
83
+ CASE_OP_32_64(movcond):
149
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
84
+ done = fold_movcond(&ctx, op);
150
- tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW);
85
+ break;
151
+ tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESW | MO_ALIGN);
86
CASE_OP_32_64(mul):
152
    }
87
done = fold_mul(&ctx, op);
153
    return;
88
break;
154
case 0xc600:        /* mov.l @(disp,GBR),R0 */
155
    {
156
     TCGv addr = tcg_temp_new();
157
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
158
- tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL);
159
+ tcg_gen_qemu_ld_i32(REG(0), addr, ctx->memidx, MO_TESL | MO_ALIGN);
160
    }
161
    return;
162
case 0xc000:        /* mov.b R0,@(disp,GBR) */
163
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
164
    {
165
     TCGv addr = tcg_temp_new();
166
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 2);
167
- tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW);
168
+ tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUW | MO_ALIGN);
169
    }
170
    return;
171
case 0xc200:        /* mov.l R0,@(disp,GBR) */
172
    {
173
     TCGv addr = tcg_temp_new();
174
     tcg_gen_addi_i32(addr, cpu_gbr, B7_0 * 4);
175
- tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL);
176
+ tcg_gen_qemu_st_i32(REG(0), addr, ctx->memidx, MO_TEUL | MO_ALIGN);
177
    }
178
    return;
179
case 0x8000:        /* mov.b R0,@(disp,Rn) */
180
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
181
    return;
182
case 0x4087:        /* ldc.l @Rm+,Rn_BANK */
183
    CHECK_PRIVILEGED
184
- tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx, MO_TESL);
185
+ tcg_gen_qemu_ld_i32(ALTREG(B6_4), REG(B11_8), ctx->memidx,
186
+ MO_TESL | MO_ALIGN);
187
    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
188
    return;
189
case 0x0082:        /* stc Rm_BANK,Rn */
190
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
191
    {
192
     TCGv addr = tcg_temp_new();
193
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
194
- tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx, MO_TEUL);
195
+ tcg_gen_qemu_st_i32(ALTREG(B6_4), addr, ctx->memidx,
196
+ MO_TEUL | MO_ALIGN);
197
     tcg_gen_mov_i32(REG(B11_8), addr);
198
    }
199
    return;
200
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
201
    CHECK_PRIVILEGED
202
    {
203
     TCGv val = tcg_temp_new();
204
- tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL);
205
+ tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
206
+ MO_TESL | MO_ALIGN);
207
tcg_gen_andi_i32(val, val, 0x700083f3);
208
gen_write_sr(val);
209
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
210
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
211
TCGv val = tcg_temp_new();
212
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
213
gen_read_sr(val);
214
- tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
215
+ tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
216
     tcg_gen_mov_i32(REG(B11_8), addr);
217
    }
218
    return;
219
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
220
return;                            \
221
case ldpnum:                            \
222
prechk                             \
223
- tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, MO_TESL); \
224
+ tcg_gen_qemu_ld_i32(cpu_##reg, REG(B11_8), ctx->memidx, \
225
+ MO_TESL | MO_ALIGN); \
226
tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);        \
227
return;
228
#define ST(reg,stnum,stpnum,prechk)        \
229
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
230
{                                \
231
    TCGv addr = tcg_temp_new();                \
232
    tcg_gen_subi_i32(addr, REG(B11_8), 4);            \
233
- tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, MO_TEUL); \
234
+ tcg_gen_qemu_st_i32(cpu_##reg, addr, ctx->memidx, \
235
+ MO_TEUL | MO_ALIGN); \
236
    tcg_gen_mov_i32(REG(B11_8), addr);            \
237
}                                \
238
return;
239
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
240
    CHECK_FPU_ENABLED
241
    {
242
     TCGv addr = tcg_temp_new();
243
- tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx, MO_TESL);
244
+ tcg_gen_qemu_ld_i32(addr, REG(B11_8), ctx->memidx,
245
+ MO_TESL | MO_ALIGN);
246
     tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
247
gen_helper_ld_fpscr(cpu_env, addr);
248
ctx->base.is_jmp = DISAS_STOP;
249
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
250
     tcg_gen_andi_i32(val, cpu_fpscr, 0x003fffff);
251
     addr = tcg_temp_new();
252
     tcg_gen_subi_i32(addr, REG(B11_8), 4);
253
- tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL);
254
+ tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL | MO_ALIGN);
255
     tcg_gen_mov_i32(REG(B11_8), addr);
256
    }
257
    return;
258
case 0x00c3:        /* movca.l R0,@Rm */
259
{
260
TCGv val = tcg_temp_new();
261
- tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TEUL);
262
+ tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx,
263
+ MO_TEUL | MO_ALIGN);
264
gen_helper_movcal(cpu_env, REG(B11_8), val);
265
- tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
266
+ tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
267
+ MO_TEUL | MO_ALIGN);
268
}
269
ctx->has_movcal = 1;
270
    return;
271
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
272
cpu_lock_addr, fail);
273
tmp = tcg_temp_new();
274
tcg_gen_atomic_cmpxchg_i32(tmp, REG(B11_8), cpu_lock_value,
275
- REG(0), ctx->memidx, MO_TEUL);
276
+ REG(0), ctx->memidx,
277
+ MO_TEUL | MO_ALIGN);
278
tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, tmp, cpu_lock_value);
279
} else {
280
tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_lock_addr, -1, fail);
281
- tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL);
282
+ tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx,
283
+ MO_TEUL | MO_ALIGN);
284
tcg_gen_movi_i32(cpu_sr_t, 1);
285
}
286
tcg_gen_br(done);
287
@@ -XXX,XX +XXX,XX @@ static void _decode_opc(DisasContext * ctx)
288
if ((tb_cflags(ctx->base.tb) & CF_PARALLEL)) {
289
TCGv tmp = tcg_temp_new();
290
tcg_gen_mov_i32(tmp, REG(B11_8));
291
- tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
292
+ tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
293
+ MO_TESL | MO_ALIGN);
294
tcg_gen_mov_i32(cpu_lock_value, REG(0));
295
tcg_gen_mov_i32(cpu_lock_addr, tmp);
296
} else {
297
- tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx, MO_TESL);
298
+ tcg_gen_qemu_ld_i32(REG(0), REG(B11_8), ctx->memidx,
299
+ MO_TESL | MO_ALIGN);
300
tcg_gen_movi_i32(cpu_lock_addr, 0);
301
}
302
return;
89
--
303
--
90
2.25.1
304
2.34.1
91
305
92
306
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
3
---
5
tcg/optimize.c | 37 +++++++++++++++++++++----------------
4
configs/targets/sh4-linux-user.mak | 1 -
6
1 file changed, 21 insertions(+), 16 deletions(-)
5
configs/targets/sh4-softmmu.mak | 1 -
6
configs/targets/sh4eb-linux-user.mak | 1 -
7
configs/targets/sh4eb-softmmu.mak | 1 -
8
4 files changed, 4 deletions(-)
7
9
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
diff --git a/configs/targets/sh4-linux-user.mak b/configs/targets/sh4-linux-user.mak
9
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
12
--- a/configs/targets/sh4-linux-user.mak
11
+++ b/tcg/optimize.c
13
+++ b/configs/targets/sh4-linux-user.mak
12
@@ -XXX,XX +XXX,XX @@ static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
14
@@ -XXX,XX +XXX,XX @@
13
return fold_const2(ctx, op);
15
TARGET_ARCH=sh4
14
}
16
TARGET_SYSTBL_ABI=common
15
17
TARGET_SYSTBL=syscall.tbl
16
+static bool fold_mulu2_i32(OptContext *ctx, TCGOp *op)
18
-TARGET_ALIGNED_ONLY=y
17
+{
19
TARGET_HAS_BFLT=y
18
+ if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
20
diff --git a/configs/targets/sh4-softmmu.mak b/configs/targets/sh4-softmmu.mak
19
+ uint32_t a = arg_info(op->args[2])->val;
21
index XXXXXXX..XXXXXXX 100644
20
+ uint32_t b = arg_info(op->args[3])->val;
22
--- a/configs/targets/sh4-softmmu.mak
21
+ uint64_t r = (uint64_t)a * b;
23
+++ b/configs/targets/sh4-softmmu.mak
22
+ TCGArg rl, rh;
24
@@ -1,2 +1 @@
23
+ TCGOp *op2 = tcg_op_insert_before(ctx->tcg, op, INDEX_op_mov_i32);
25
TARGET_ARCH=sh4
24
+
26
-TARGET_ALIGNED_ONLY=y
25
+ rl = op->args[0];
27
diff --git a/configs/targets/sh4eb-linux-user.mak b/configs/targets/sh4eb-linux-user.mak
26
+ rh = op->args[1];
28
index XXXXXXX..XXXXXXX 100644
27
+ tcg_opt_gen_movi(ctx, op, rl, (int32_t)r);
29
--- a/configs/targets/sh4eb-linux-user.mak
28
+ tcg_opt_gen_movi(ctx, op2, rh, (int32_t)(r >> 32));
30
+++ b/configs/targets/sh4eb-linux-user.mak
29
+ return true;
31
@@ -XXX,XX +XXX,XX @@
30
+ }
32
TARGET_ARCH=sh4
31
+ return false;
33
TARGET_SYSTBL_ABI=common
32
+}
34
TARGET_SYSTBL=syscall.tbl
33
+
35
-TARGET_ALIGNED_ONLY=y
34
static bool fold_nand(OptContext *ctx, TCGOp *op)
36
TARGET_BIG_ENDIAN=y
35
{
37
TARGET_HAS_BFLT=y
36
return fold_const2(ctx, op);
38
diff --git a/configs/targets/sh4eb-softmmu.mak b/configs/targets/sh4eb-softmmu.mak
37
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
39
index XXXXXXX..XXXXXXX 100644
38
}
40
--- a/configs/targets/sh4eb-softmmu.mak
39
break;
41
+++ b/configs/targets/sh4eb-softmmu.mak
40
42
@@ -XXX,XX +XXX,XX @@
41
- case INDEX_op_mulu2_i32:
43
TARGET_ARCH=sh4
42
- if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
44
-TARGET_ALIGNED_ONLY=y
43
- uint32_t a = arg_info(op->args[2])->val;
45
TARGET_BIG_ENDIAN=y
44
- uint32_t b = arg_info(op->args[3])->val;
45
- uint64_t r = (uint64_t)a * b;
46
- TCGArg rl, rh;
47
- TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_mov_i32);
48
-
49
- rl = op->args[0];
50
- rh = op->args[1];
51
- tcg_opt_gen_movi(&ctx, op, rl, (int32_t)r);
52
- tcg_opt_gen_movi(&ctx, op2, rh, (int32_t)(r >> 32));
53
- continue;
54
- }
55
- break;
56
-
57
default:
58
break;
59
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
61
CASE_OP_32_64(muluh):
62
done = fold_mul_highpart(&ctx, op);
63
break;
64
+ case INDEX_op_mulu2_i32:
65
+ done = fold_mulu2_i32(&ctx, op);
66
+ break;
67
CASE_OP_32_64(nand):
68
done = fold_nand(&ctx, op);
69
break;
70
--
46
--
71
2.25.1
47
2.34.1
72
48
73
49
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
All uses have now been expunged.
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
3
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
5
---
5
tcg/optimize.c | 23 ++++++++++++++---------
6
include/exec/memop.h | 13 ++-----------
6
1 file changed, 14 insertions(+), 9 deletions(-)
7
include/exec/poison.h | 1 -
8
tcg/tcg.c | 5 -----
9
3 files changed, 2 insertions(+), 17 deletions(-)
7
10
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/include/exec/memop.h b/include/exec/memop.h
9
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
13
--- a/include/exec/memop.h
11
+++ b/tcg/optimize.c
14
+++ b/include/exec/memop.h
12
@@ -XXX,XX +XXX,XX @@ static bool fold_remainder(OptContext *ctx, TCGOp *op)
15
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
13
return fold_const2(ctx, op);
16
* MO_UNALN accesses are never checked for alignment.
14
}
17
* MO_ALIGN accesses will result in a call to the CPU's
15
18
* do_unaligned_access hook if the guest address is not aligned.
16
+static bool fold_setcond(OptContext *ctx, TCGOp *op)
19
- * The default depends on whether the target CPU defines
17
+{
20
- * TARGET_ALIGNED_ONLY.
18
+ TCGCond cond = op->args[3];
21
*
19
+ int i = do_constant_folding_cond(op->opc, op->args[1], op->args[2], cond);
22
* Some architectures (e.g. ARMv8) need the address which is aligned
20
+
23
* to a size more than the size of the memory access.
21
+ if (i >= 0) {
24
@@ -XXX,XX +XXX,XX @@ typedef enum MemOp {
22
+ return tcg_opt_gen_movi(ctx, op, op->args[0], i);
25
*/
23
+ }
26
MO_ASHIFT = 5,
24
+ return false;
27
MO_AMASK = 0x7 << MO_ASHIFT,
25
+}
28
-#ifdef NEED_CPU_H
26
+
29
-#ifdef TARGET_ALIGNED_ONLY
27
static bool fold_setcond2(OptContext *ctx, TCGOp *op)
30
- MO_ALIGN = 0,
28
{
31
- MO_UNALN = MO_AMASK,
29
TCGCond cond = op->args[5];
32
-#else
30
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
- MO_ALIGN = MO_AMASK,
31
}
34
- MO_UNALN = 0,
32
break;
35
-#endif
33
36
-#endif
34
- CASE_OP_32_64(setcond):
37
+ MO_UNALN = 0,
35
- i = do_constant_folding_cond(opc, op->args[1],
38
MO_ALIGN_2 = 1 << MO_ASHIFT,
36
- op->args[2], op->args[3]);
39
MO_ALIGN_4 = 2 << MO_ASHIFT,
37
- if (i >= 0) {
40
MO_ALIGN_8 = 3 << MO_ASHIFT,
38
- tcg_opt_gen_movi(&ctx, op, op->args[0], i);
41
MO_ALIGN_16 = 4 << MO_ASHIFT,
39
- continue;
42
MO_ALIGN_32 = 5 << MO_ASHIFT,
40
- }
43
MO_ALIGN_64 = 6 << MO_ASHIFT,
41
- break;
44
+ MO_ALIGN = MO_AMASK,
42
-
45
43
CASE_OP_32_64(movcond):
46
/* Combinations of the above, for ease of use. */
44
i = do_constant_folding_cond(opc, op->args[1],
47
MO_UB = MO_8,
45
op->args[2], op->args[5]);
48
diff --git a/include/exec/poison.h b/include/exec/poison.h
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
49
index XXXXXXX..XXXXXXX 100644
47
CASE_OP_32_64(shr):
50
--- a/include/exec/poison.h
48
done = fold_shift(&ctx, op);
51
+++ b/include/exec/poison.h
49
break;
52
@@ -XXX,XX +XXX,XX @@
50
+ CASE_OP_32_64(setcond):
53
#pragma GCC poison TARGET_TRICORE
51
+ done = fold_setcond(&ctx, op);
54
#pragma GCC poison TARGET_XTENSA
52
+ break;
55
53
case INDEX_op_setcond2_i32:
56
-#pragma GCC poison TARGET_ALIGNED_ONLY
54
done = fold_setcond2(&ctx, op);
57
#pragma GCC poison TARGET_HAS_BFLT
55
break;
58
#pragma GCC poison TARGET_NAME
59
#pragma GCC poison TARGET_SUPPORTS_MTTCG
60
diff --git a/tcg/tcg.c b/tcg/tcg.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/tcg/tcg.c
63
+++ b/tcg/tcg.c
64
@@ -XXX,XX +XXX,XX @@ static const char * const ldst_name[] =
65
};
66
67
static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
68
-#ifdef TARGET_ALIGNED_ONLY
69
[MO_UNALN >> MO_ASHIFT] = "un+",
70
- [MO_ALIGN >> MO_ASHIFT] = "",
71
-#else
72
- [MO_UNALN >> MO_ASHIFT] = "",
73
[MO_ALIGN >> MO_ASHIFT] = "al+",
74
-#endif
75
[MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
76
[MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
77
[MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
56
--
78
--
57
2.25.1
79
2.34.1
58
80
59
81
diff view generated by jsdifflib
1
Continue splitting tcg_optimize.
1
Like cpu_in_exclusive_context, but also true if
2
there is no other cpu against which we could race.
3
4
Use it in tb_flush as a direct replacement.
5
Use it in cpu_loop_exit_atomic to ensure that there
6
is no loop against cpu_exec_step_atomic.
2
7
3
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
8
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
9
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
10
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
11
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
12
---
8
tcg/optimize.c | 22 ++++++++++++++--------
13
accel/tcg/internal.h | 9 +++++++++
9
1 file changed, 14 insertions(+), 8 deletions(-)
14
accel/tcg/cpu-exec-common.c | 3 +++
15
accel/tcg/tb-maint.c | 2 +-
16
3 files changed, 13 insertions(+), 1 deletion(-)
10
17
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
18
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
12
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
20
--- a/accel/tcg/internal.h
14
+++ b/tcg/optimize.c
21
+++ b/accel/tcg/internal.h
15
@@ -XXX,XX +XXX,XX @@ static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
22
@@ -XXX,XX +XXX,XX @@ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
16
}
23
}
17
}
24
}
18
25
19
+static void copy_propagate(OptContext *ctx, TCGOp *op,
26
+/*
20
+ int nb_oargs, int nb_iargs)
27
+ * Return true if CS is not running in parallel with other cpus, either
28
+ * because there are no other cpus or we are within an exclusive context.
29
+ */
30
+static inline bool cpu_in_serial_context(CPUState *cs)
21
+{
31
+{
22
+ TCGContext *s = ctx->tcg;
32
+ return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs);
23
+
24
+ for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
25
+ TCGTemp *ts = arg_temp(op->args[i]);
26
+ if (ts && ts_is_copy(ts)) {
27
+ op->args[i] = temp_arg(find_better_copy(s, ts));
28
+ }
29
+ }
30
+}
33
+}
31
+
34
+
32
/* Propagate constants and copies, fold constant expressions. */
35
extern int64_t max_delay;
33
void tcg_optimize(TCGContext *s)
36
extern int64_t max_advance;
37
38
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/accel/tcg/cpu-exec-common.c
41
+++ b/accel/tcg/cpu-exec-common.c
42
@@ -XXX,XX +XXX,XX @@
43
#include "sysemu/tcg.h"
44
#include "exec/exec-all.h"
45
#include "qemu/plugin.h"
46
+#include "internal.h"
47
48
bool tcg_allowed;
49
50
@@ -XXX,XX +XXX,XX @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
51
52
void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
34
{
53
{
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
54
+ /* Prevent looping if already executing in a serial context. */
36
nb_iargs = def->nb_iargs;
55
+ g_assert(!cpu_in_serial_context(cpu));
37
}
56
cpu->exception_index = EXCP_ATOMIC;
38
init_arguments(&ctx, op, nb_oargs + nb_iargs);
57
cpu_loop_exit_restore(cpu, pc);
39
-
58
}
40
- /* Do copy propagation */
59
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
41
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
60
index XXXXXXX..XXXXXXX 100644
42
- TCGTemp *ts = arg_temp(op->args[i]);
61
--- a/accel/tcg/tb-maint.c
43
- if (ts && ts_is_copy(ts)) {
62
+++ b/accel/tcg/tb-maint.c
44
- op->args[i] = temp_arg(find_better_copy(s, ts));
63
@@ -XXX,XX +XXX,XX @@ void tb_flush(CPUState *cpu)
45
- }
64
if (tcg_enabled()) {
46
- }
65
unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
47
+ copy_propagate(&ctx, op, nb_oargs, nb_iargs);
66
48
67
- if (cpu_in_exclusive_context(cpu)) {
49
/* For commutative operations make constant second argument */
68
+ if (cpu_in_serial_context(cpu)) {
50
switch (opc) {
69
do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
70
} else {
71
async_safe_run_on_cpu(cpu, do_tb_flush,
51
--
72
--
52
2.25.1
73
2.34.1
53
74
54
75
diff view generated by jsdifflib
1
There was no real reason for calls to have separate code here.
1
Instead of playing with offsetof in various places, use
2
Unify init for calls vs non-calls using the call path, which
2
MMUAccessType to index an array. This is easily defined
3
handles TCG_CALL_DUMMY_ARG.
3
instead of the previous dummy padding array in the union.
4
4
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
6
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
9
---
9
---
10
tcg/optimize.c | 25 +++++++++++--------------
10
include/exec/cpu-defs.h | 7 ++-
11
1 file changed, 11 insertions(+), 14 deletions(-)
11
include/exec/cpu_ldst.h | 26 ++++++++--
12
accel/tcg/cputlb.c | 104 +++++++++++++---------------------------
13
3 files changed, 59 insertions(+), 78 deletions(-)
12
14
13
diff --git a/tcg/optimize.c b/tcg/optimize.c
15
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/tcg/optimize.c
17
--- a/include/exec/cpu-defs.h
16
+++ b/tcg/optimize.c
18
+++ b/include/exec/cpu-defs.h
17
@@ -XXX,XX +XXX,XX @@ static void init_ts_info(OptContext *ctx, TCGTemp *ts)
19
@@ -XXX,XX +XXX,XX @@ typedef struct CPUTLBEntry {
20
use the corresponding iotlb value. */
21
uintptr_t addend;
22
};
23
- /* padding to get a power of two size */
24
- uint8_t dummy[1 << CPU_TLB_ENTRY_BITS];
25
+ /*
26
+ * Padding to get a power of two size, as well as index
27
+ * access to addr_{read,write,code}.
28
+ */
29
+ target_ulong addr_idx[(1 << CPU_TLB_ENTRY_BITS) / TARGET_LONG_SIZE];
30
};
31
} CPUTLBEntry;
32
33
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/include/exec/cpu_ldst.h
36
+++ b/include/exec/cpu_ldst.h
37
@@ -XXX,XX +XXX,XX @@ static inline void clear_helper_retaddr(void)
38
/* Needed for TCG_OVERSIZED_GUEST */
39
#include "tcg/tcg.h"
40
41
+static inline target_ulong tlb_read_idx(const CPUTLBEntry *entry,
42
+ MMUAccessType access_type)
43
+{
44
+ /* Do not rearrange the CPUTLBEntry structure members. */
45
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
46
+ MMU_DATA_LOAD * TARGET_LONG_SIZE);
47
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
48
+ MMU_DATA_STORE * TARGET_LONG_SIZE);
49
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
50
+ MMU_INST_FETCH * TARGET_LONG_SIZE);
51
+
52
+ const target_ulong *ptr = &entry->addr_idx[access_type];
53
+#if TCG_OVERSIZED_GUEST
54
+ return *ptr;
55
+#else
56
+ /* ofs might correspond to .addr_write, so use qatomic_read */
57
+ return qatomic_read(ptr);
58
+#endif
59
+}
60
+
61
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
62
{
63
-#if TCG_OVERSIZED_GUEST
64
- return entry->addr_write;
65
-#else
66
- return qatomic_read(&entry->addr_write);
67
-#endif
68
+ return tlb_read_idx(entry, MMU_DATA_STORE);
69
}
70
71
/* Find the TLB index corresponding to the mmu_idx + address pair. */
72
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
73
index XXXXXXX..XXXXXXX 100644
74
--- a/accel/tcg/cputlb.c
75
+++ b/accel/tcg/cputlb.c
76
@@ -XXX,XX +XXX,XX @@ static void io_writex(CPUArchState *env, CPUTLBEntryFull *full,
18
}
77
}
19
}
78
}
20
79
21
-static void init_arg_info(OptContext *ctx, TCGArg arg)
80
-static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
22
-{
81
-{
23
- init_ts_info(ctx, arg_temp(arg));
82
-#if TCG_OVERSIZED_GUEST
83
- return *(target_ulong *)((uintptr_t)entry + ofs);
84
-#else
85
- /* ofs might correspond to .addr_write, so use qatomic_read */
86
- return qatomic_read((target_ulong *)((uintptr_t)entry + ofs));
87
-#endif
24
-}
88
-}
25
-
89
-
26
static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
90
/* Return true if ADDR is present in the victim tlb, and has been copied
27
{
91
back to the main tlb. */
28
TCGTemp *i, *g, *l;
92
static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
29
@@ -XXX,XX +XXX,XX @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
93
- size_t elt_ofs, target_ulong page)
94
+ MMUAccessType access_type, target_ulong page)
95
{
96
size_t vidx;
97
98
assert_cpu_is_self(env_cpu(env));
99
for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
100
CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
101
- target_ulong cmp;
102
-
103
- /* elt_ofs might correspond to .addr_write, so use qatomic_read */
104
-#if TCG_OVERSIZED_GUEST
105
- cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
106
-#else
107
- cmp = qatomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
108
-#endif
109
+ target_ulong cmp = tlb_read_idx(vtlb, access_type);
110
111
if (cmp == page) {
112
/* Found entry in victim tlb, swap tlb and iotlb. */
113
@@ -XXX,XX +XXX,XX @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
30
return false;
114
return false;
31
}
115
}
32
116
33
+static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
117
-/* Macro to call the above, with local variables from the use context. */
34
+{
118
-#define VICTIM_TLB_HIT(TY, ADDR) \
35
+ for (int i = 0; i < nb_args; i++) {
119
- victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
36
+ TCGTemp *ts = arg_temp(op->args[i]);
120
- (ADDR) & TARGET_PAGE_MASK)
37
+ if (ts) {
121
-
38
+ init_ts_info(ctx, ts);
122
static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
39
+ }
123
CPUTLBEntryFull *full, uintptr_t retaddr)
40
+ }
124
{
41
+}
125
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
42
+
126
{
43
/* Propagate constants and copies, fold constant expressions. */
127
uintptr_t index = tlb_index(env, mmu_idx, addr);
44
void tcg_optimize(TCGContext *s)
128
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
45
{
129
- target_ulong tlb_addr, page_addr;
46
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
- size_t elt_ofs;
47
if (opc == INDEX_op_call) {
131
- int flags;
48
nb_oargs = TCGOP_CALLO(op);
132
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
49
nb_iargs = TCGOP_CALLI(op);
133
+ target_ulong page_addr = addr & TARGET_PAGE_MASK;
50
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
134
+ int flags = TLB_FLAGS_MASK;
51
- TCGTemp *ts = arg_temp(op->args[i]);
135
52
- if (ts) {
136
- switch (access_type) {
53
- init_ts_info(&ctx, ts);
137
- case MMU_DATA_LOAD:
54
- }
138
- elt_ofs = offsetof(CPUTLBEntry, addr_read);
55
- }
139
- break;
56
} else {
140
- case MMU_DATA_STORE:
57
nb_oargs = def->nb_oargs;
141
- elt_ofs = offsetof(CPUTLBEntry, addr_write);
58
nb_iargs = def->nb_iargs;
142
- break;
59
- for (i = 0; i < nb_oargs + nb_iargs; i++) {
143
- case MMU_INST_FETCH:
60
- init_arg_info(&ctx, op->args[i]);
144
- elt_ofs = offsetof(CPUTLBEntry, addr_code);
61
- }
145
- break;
146
- default:
147
- g_assert_not_reached();
148
- }
149
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
150
-
151
- flags = TLB_FLAGS_MASK;
152
- page_addr = addr & TARGET_PAGE_MASK;
153
if (!tlb_hit_page(tlb_addr, page_addr)) {
154
- if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
155
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type, page_addr)) {
156
CPUState *cs = env_cpu(env);
157
158
if (!cs->cc->tcg_ops->tlb_fill(cs, addr, fault_size, access_type,
159
@@ -XXX,XX +XXX,XX @@ static int probe_access_internal(CPUArchState *env, target_ulong addr,
160
*/
161
flags &= ~TLB_INVALID_MASK;
62
}
162
}
63
+ init_arguments(&ctx, op, nb_oargs + nb_iargs);
163
- tlb_addr = tlb_read_ofs(entry, elt_ofs);
64
164
+ tlb_addr = tlb_read_idx(entry, access_type);
65
/* Do copy propagation */
165
}
66
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
166
flags &= tlb_addr;
167
168
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
169
if (prot & PAGE_WRITE) {
170
tlb_addr = tlb_addr_write(tlbe);
171
if (!tlb_hit(tlb_addr, addr)) {
172
- if (!VICTIM_TLB_HIT(addr_write, addr)) {
173
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
174
+ addr & TARGET_PAGE_MASK)) {
175
tlb_fill(env_cpu(env), addr, size,
176
MMU_DATA_STORE, mmu_idx, retaddr);
177
index = tlb_index(env, mmu_idx, addr);
178
@@ -XXX,XX +XXX,XX @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
179
} else /* if (prot & PAGE_READ) */ {
180
tlb_addr = tlbe->addr_read;
181
if (!tlb_hit(tlb_addr, addr)) {
182
- if (!VICTIM_TLB_HIT(addr_read, addr)) {
183
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_LOAD,
184
+ addr & TARGET_PAGE_MASK)) {
185
tlb_fill(env_cpu(env), addr, size,
186
MMU_DATA_LOAD, mmu_idx, retaddr);
187
index = tlb_index(env, mmu_idx, addr);
188
@@ -XXX,XX +XXX,XX @@ load_memop(const void *haddr, MemOp op)
189
190
static inline uint64_t QEMU_ALWAYS_INLINE
191
load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
192
- uintptr_t retaddr, MemOp op, bool code_read,
193
+ uintptr_t retaddr, MemOp op, MMUAccessType access_type,
194
FullLoadHelper *full_load)
195
{
196
- const size_t tlb_off = code_read ?
197
- offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
198
- const MMUAccessType access_type =
199
- code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
200
const unsigned a_bits = get_alignment_bits(get_memop(oi));
201
const size_t size = memop_size(op);
202
uintptr_t mmu_idx = get_mmuidx(oi);
203
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
204
205
index = tlb_index(env, mmu_idx, addr);
206
entry = tlb_entry(env, mmu_idx, addr);
207
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
208
+ tlb_addr = tlb_read_idx(entry, access_type);
209
210
/* If the TLB entry is for a different page, reload and try again. */
211
if (!tlb_hit(tlb_addr, addr)) {
212
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
213
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type,
214
addr & TARGET_PAGE_MASK)) {
215
tlb_fill(env_cpu(env), addr, size,
216
access_type, mmu_idx, retaddr);
217
index = tlb_index(env, mmu_idx, addr);
218
entry = tlb_entry(env, mmu_idx, addr);
219
}
220
- tlb_addr = code_read ? entry->addr_code : entry->addr_read;
221
+ tlb_addr = tlb_read_idx(entry, access_type);
222
tlb_addr &= ~TLB_INVALID_MASK;
223
}
224
225
@@ -XXX,XX +XXX,XX @@ static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
226
MemOpIdx oi, uintptr_t retaddr)
227
{
228
validate_memop(oi, MO_UB);
229
- return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
230
+ return load_helper(env, addr, oi, retaddr, MO_UB, MMU_DATA_LOAD,
231
+ full_ldub_mmu);
232
}
233
234
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
235
@@ -XXX,XX +XXX,XX @@ static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
236
MemOpIdx oi, uintptr_t retaddr)
237
{
238
validate_memop(oi, MO_LEUW);
239
- return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
240
+ return load_helper(env, addr, oi, retaddr, MO_LEUW, MMU_DATA_LOAD,
241
full_le_lduw_mmu);
242
}
243
244
@@ -XXX,XX +XXX,XX @@ static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
245
MemOpIdx oi, uintptr_t retaddr)
246
{
247
validate_memop(oi, MO_BEUW);
248
- return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
249
+ return load_helper(env, addr, oi, retaddr, MO_BEUW, MMU_DATA_LOAD,
250
full_be_lduw_mmu);
251
}
252
253
@@ -XXX,XX +XXX,XX @@ static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
254
MemOpIdx oi, uintptr_t retaddr)
255
{
256
validate_memop(oi, MO_LEUL);
257
- return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
258
+ return load_helper(env, addr, oi, retaddr, MO_LEUL, MMU_DATA_LOAD,
259
full_le_ldul_mmu);
260
}
261
262
@@ -XXX,XX +XXX,XX @@ static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
263
MemOpIdx oi, uintptr_t retaddr)
264
{
265
validate_memop(oi, MO_BEUL);
266
- return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
267
+ return load_helper(env, addr, oi, retaddr, MO_BEUL, MMU_DATA_LOAD,
268
full_be_ldul_mmu);
269
}
270
271
@@ -XXX,XX +XXX,XX @@ uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
272
MemOpIdx oi, uintptr_t retaddr)
273
{
274
validate_memop(oi, MO_LEUQ);
275
- return load_helper(env, addr, oi, retaddr, MO_LEUQ, false,
276
+ return load_helper(env, addr, oi, retaddr, MO_LEUQ, MMU_DATA_LOAD,
277
helper_le_ldq_mmu);
278
}
279
280
@@ -XXX,XX +XXX,XX @@ uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
281
MemOpIdx oi, uintptr_t retaddr)
282
{
283
validate_memop(oi, MO_BEUQ);
284
- return load_helper(env, addr, oi, retaddr, MO_BEUQ, false,
285
+ return load_helper(env, addr, oi, retaddr, MO_BEUQ, MMU_DATA_LOAD,
286
helper_be_ldq_mmu);
287
}
288
289
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
290
uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
291
bool big_endian)
292
{
293
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
294
uintptr_t index, index2;
295
CPUTLBEntry *entry, *entry2;
296
target_ulong page1, page2, tlb_addr, tlb_addr2;
297
@@ -XXX,XX +XXX,XX @@ store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
298
299
tlb_addr2 = tlb_addr_write(entry2);
300
if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
301
- if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
302
+ if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) {
303
tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
304
mmu_idx, retaddr);
305
index2 = tlb_index(env, mmu_idx, page2);
306
@@ -XXX,XX +XXX,XX @@ static inline void QEMU_ALWAYS_INLINE
307
store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
308
MemOpIdx oi, uintptr_t retaddr, MemOp op)
309
{
310
- const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
311
const unsigned a_bits = get_alignment_bits(get_memop(oi));
312
const size_t size = memop_size(op);
313
uintptr_t mmu_idx = get_mmuidx(oi);
314
@@ -XXX,XX +XXX,XX @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
315
316
/* If the TLB entry is for a different page, reload and try again. */
317
if (!tlb_hit(tlb_addr, addr)) {
318
- if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
319
+ if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
320
addr & TARGET_PAGE_MASK)) {
321
tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
322
mmu_idx, retaddr);
323
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
324
static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
325
MemOpIdx oi, uintptr_t retaddr)
326
{
327
- return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_code);
328
+ return load_helper(env, addr, oi, retaddr, MO_8,
329
+ MMU_INST_FETCH, full_ldub_code);
330
}
331
332
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
333
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
334
static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
335
MemOpIdx oi, uintptr_t retaddr)
336
{
337
- return load_helper(env, addr, oi, retaddr, MO_TEUW, true, full_lduw_code);
338
+ return load_helper(env, addr, oi, retaddr, MO_TEUW,
339
+ MMU_INST_FETCH, full_lduw_code);
340
}
341
342
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
343
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
344
static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
345
MemOpIdx oi, uintptr_t retaddr)
346
{
347
- return load_helper(env, addr, oi, retaddr, MO_TEUL, true, full_ldl_code);
348
+ return load_helper(env, addr, oi, retaddr, MO_TEUL,
349
+ MMU_INST_FETCH, full_ldl_code);
350
}
351
352
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
353
@@ -XXX,XX +XXX,XX @@ uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
354
static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
355
MemOpIdx oi, uintptr_t retaddr)
356
{
357
- return load_helper(env, addr, oi, retaddr, MO_TEUQ, true, full_ldq_code);
358
+ return load_helper(env, addr, oi, retaddr, MO_TEUQ,
359
+ MMU_INST_FETCH, full_ldq_code);
360
}
361
362
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
67
--
363
--
68
2.25.1
364
2.34.1
69
365
70
366
diff view generated by jsdifflib
Deleted patch
1
Return -1 instead of 2 for failure, so that we can
2
use comparisons against 0 for all cases.
3
1
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
tcg/optimize.c | 145 +++++++++++++++++++++++++------------------------
9
1 file changed, 74 insertions(+), 71 deletions(-)
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
+++ b/tcg/optimize.c
15
@@ -XXX,XX +XXX,XX @@ static bool do_constant_folding_cond_eq(TCGCond c)
16
}
17
}
18
19
-/* Return 2 if the condition can't be simplified, and the result
20
- of the condition (0 or 1) if it can */
21
-static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
22
- TCGArg y, TCGCond c)
23
+/*
24
+ * Return -1 if the condition can't be simplified,
25
+ * and the result of the condition (0 or 1) if it can.
26
+ */
27
+static int do_constant_folding_cond(TCGOpcode op, TCGArg x,
28
+ TCGArg y, TCGCond c)
29
{
30
uint64_t xv = arg_info(x)->val;
31
uint64_t yv = arg_info(y)->val;
32
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
33
case TCG_COND_GEU:
34
return 1;
35
default:
36
- return 2;
37
+ return -1;
38
}
39
}
40
- return 2;
41
+ return -1;
42
}
43
44
-/* Return 2 if the condition can't be simplified, and the result
45
- of the condition (0 or 1) if it can */
46
-static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
47
+/*
48
+ * Return -1 if the condition can't be simplified,
49
+ * and the result of the condition (0 or 1) if it can.
50
+ */
51
+static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
52
{
53
TCGArg al = p1[0], ah = p1[1];
54
TCGArg bl = p2[0], bh = p2[1];
55
@@ -XXX,XX +XXX,XX @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
56
if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
57
return do_constant_folding_cond_eq(c);
58
}
59
- return 2;
60
+ return -1;
61
}
62
63
static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
65
break;
66
67
CASE_OP_32_64(setcond):
68
- tmp = do_constant_folding_cond(opc, op->args[1],
69
- op->args[2], op->args[3]);
70
- if (tmp != 2) {
71
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
72
+ i = do_constant_folding_cond(opc, op->args[1],
73
+ op->args[2], op->args[3]);
74
+ if (i >= 0) {
75
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
76
continue;
77
}
78
break;
79
80
CASE_OP_32_64(brcond):
81
- tmp = do_constant_folding_cond(opc, op->args[0],
82
- op->args[1], op->args[2]);
83
- switch (tmp) {
84
- case 0:
85
+ i = do_constant_folding_cond(opc, op->args[0],
86
+ op->args[1], op->args[2]);
87
+ if (i == 0) {
88
tcg_op_remove(s, op);
89
continue;
90
- case 1:
91
+ } else if (i > 0) {
92
memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
93
op->opc = opc = INDEX_op_br;
94
op->args[0] = op->args[3];
95
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
96
break;
97
98
CASE_OP_32_64(movcond):
99
- tmp = do_constant_folding_cond(opc, op->args[1],
100
- op->args[2], op->args[5]);
101
- if (tmp != 2) {
102
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4-tmp]);
103
+ i = do_constant_folding_cond(opc, op->args[1],
104
+ op->args[2], op->args[5]);
105
+ if (i >= 0) {
106
+ tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[4 - i]);
107
continue;
108
}
109
if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
110
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
111
break;
112
113
case INDEX_op_brcond2_i32:
114
- tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
115
- op->args[4]);
116
- if (tmp == 0) {
117
+ i = do_constant_folding_cond2(&op->args[0], &op->args[2],
118
+ op->args[4]);
119
+ if (i == 0) {
120
do_brcond_false:
121
tcg_op_remove(s, op);
122
continue;
123
}
124
- if (tmp == 1) {
125
+ if (i > 0) {
126
do_brcond_true:
127
op->opc = opc = INDEX_op_br;
128
op->args[0] = op->args[5];
129
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
130
if (op->args[4] == TCG_COND_EQ) {
131
/* Simplify EQ comparisons where one of the pairs
132
can be simplified. */
133
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
134
- op->args[0], op->args[2],
135
- TCG_COND_EQ);
136
- if (tmp == 0) {
137
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
138
+ op->args[0], op->args[2],
139
+ TCG_COND_EQ);
140
+ if (i == 0) {
141
goto do_brcond_false;
142
- } else if (tmp == 1) {
143
+ } else if (i > 0) {
144
goto do_brcond_high;
145
}
146
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
147
- op->args[1], op->args[3],
148
- TCG_COND_EQ);
149
- if (tmp == 0) {
150
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
151
+ op->args[1], op->args[3],
152
+ TCG_COND_EQ);
153
+ if (i == 0) {
154
goto do_brcond_false;
155
- } else if (tmp != 1) {
156
+ } else if (i < 0) {
157
break;
158
}
159
do_brcond_low:
160
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
161
if (op->args[4] == TCG_COND_NE) {
162
/* Simplify NE comparisons where one of the pairs
163
can be simplified. */
164
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
165
- op->args[0], op->args[2],
166
- TCG_COND_NE);
167
- if (tmp == 0) {
168
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
169
+ op->args[0], op->args[2],
170
+ TCG_COND_NE);
171
+ if (i == 0) {
172
goto do_brcond_high;
173
- } else if (tmp == 1) {
174
+ } else if (i > 0) {
175
goto do_brcond_true;
176
}
177
- tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
178
- op->args[1], op->args[3],
179
- TCG_COND_NE);
180
- if (tmp == 0) {
181
+ i = do_constant_folding_cond(INDEX_op_brcond_i32,
182
+ op->args[1], op->args[3],
183
+ TCG_COND_NE);
184
+ if (i == 0) {
185
goto do_brcond_low;
186
- } else if (tmp == 1) {
187
+ } else if (i > 0) {
188
goto do_brcond_true;
189
}
190
}
191
break;
192
193
case INDEX_op_setcond2_i32:
194
- tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
195
- op->args[5]);
196
- if (tmp != 2) {
197
+ i = do_constant_folding_cond2(&op->args[1], &op->args[3],
198
+ op->args[5]);
199
+ if (i >= 0) {
200
do_setcond_const:
201
- tcg_opt_gen_movi(&ctx, op, op->args[0], tmp);
202
+ tcg_opt_gen_movi(&ctx, op, op->args[0], i);
203
continue;
204
}
205
if ((op->args[5] == TCG_COND_LT || op->args[5] == TCG_COND_GE)
206
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
207
if (op->args[5] == TCG_COND_EQ) {
208
/* Simplify EQ comparisons where one of the pairs
209
can be simplified. */
210
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
211
- op->args[1], op->args[3],
212
- TCG_COND_EQ);
213
- if (tmp == 0) {
214
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
215
+ op->args[1], op->args[3],
216
+ TCG_COND_EQ);
217
+ if (i == 0) {
218
goto do_setcond_const;
219
- } else if (tmp == 1) {
220
+ } else if (i > 0) {
221
goto do_setcond_high;
222
}
223
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
224
- op->args[2], op->args[4],
225
- TCG_COND_EQ);
226
- if (tmp == 0) {
227
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
228
+ op->args[2], op->args[4],
229
+ TCG_COND_EQ);
230
+ if (i == 0) {
231
goto do_setcond_high;
232
- } else if (tmp != 1) {
233
+ } else if (i < 0) {
234
break;
235
}
236
do_setcond_low:
237
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
238
if (op->args[5] == TCG_COND_NE) {
239
/* Simplify NE comparisons where one of the pairs
240
can be simplified. */
241
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
242
- op->args[1], op->args[3],
243
- TCG_COND_NE);
244
- if (tmp == 0) {
245
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
246
+ op->args[1], op->args[3],
247
+ TCG_COND_NE);
248
+ if (i == 0) {
249
goto do_setcond_high;
250
- } else if (tmp == 1) {
251
+ } else if (i > 0) {
252
goto do_setcond_const;
253
}
254
- tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
255
- op->args[2], op->args[4],
256
- TCG_COND_NE);
257
- if (tmp == 0) {
258
+ i = do_constant_folding_cond(INDEX_op_setcond_i32,
259
+ op->args[2], op->args[4],
260
+ TCG_COND_NE);
261
+ if (i == 0) {
262
goto do_setcond_low;
263
- } else if (tmp == 1) {
264
+ } else if (i > 0) {
265
goto do_setcond_const;
266
}
267
}
268
--
269
2.25.1
270
271
diff view generated by jsdifflib
1
Pull the "op r, a, a => mov r, a" optimization into a function,
1
Instead of trying to unify all operations on uint64_t, pull out
2
and use it in the outer opcode fold functions.
2
mmu_lookup() to perform the basic tlb hit and resolution.
3
Create individual functions to handle access by size.
3
4
4
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
5
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
8
---
8
tcg/optimize.c | 39 ++++++++++++++++++++++++---------------
9
accel/tcg/cputlb.c | 645 +++++++++++++++++++++++++++++----------------
9
1 file changed, 24 insertions(+), 15 deletions(-)
10
1 file changed, 424 insertions(+), 221 deletions(-)
10
11
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
14
--- a/accel/tcg/cputlb.c
14
+++ b/tcg/optimize.c
15
+++ b/accel/tcg/cputlb.c
15
@@ -XXX,XX +XXX,XX @@ static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
16
@@ -XXX,XX +XXX,XX @@ bool tlb_plugin_lookup(CPUState *cpu, target_ulong addr, int mmu_idx,
16
return false;
17
17
}
18
#endif
18
19
19
+/* If the binary operation has both arguments equal, fold to identity. */
20
+/*
20
+static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
21
+ * Probe for a load/store operation.
21
+{
22
+ * Return the host address and into @flags.
22
+ if (args_are_copies(op->args[1], op->args[2])) {
23
+ */
23
+ return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
24
+
24
+ }
25
+typedef struct MMULookupPageData {
25
+ return false;
26
+ CPUTLBEntryFull *full;
27
+ void *haddr;
28
+ target_ulong addr;
29
+ int flags;
30
+ int size;
31
+} MMULookupPageData;
32
+
33
+typedef struct MMULookupLocals {
34
+ MMULookupPageData page[2];
35
+ MemOp memop;
36
+ int mmu_idx;
37
+} MMULookupLocals;
38
+
39
+/**
40
+ * mmu_lookup1: translate one page
41
+ * @env: cpu context
42
+ * @data: lookup parameters
43
+ * @mmu_idx: virtual address context
44
+ * @access_type: load/store/code
45
+ * @ra: return address into tcg generated code, or 0
46
+ *
47
+ * Resolve the translation for the one page at @data.addr, filling in
48
+ * the rest of @data with the results. If the translation fails,
49
+ * tlb_fill will longjmp out. Return true if the softmmu tlb for
50
+ * @mmu_idx may have resized.
51
+ */
52
+static bool mmu_lookup1(CPUArchState *env, MMULookupPageData *data,
53
+ int mmu_idx, MMUAccessType access_type, uintptr_t ra)
54
+{
55
+ target_ulong addr = data->addr;
56
+ uintptr_t index = tlb_index(env, mmu_idx, addr);
57
+ CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
58
+ target_ulong tlb_addr = tlb_read_idx(entry, access_type);
59
+ bool maybe_resized = false;
60
+
61
+ /* If the TLB entry is for a different page, reload and try again. */
62
+ if (!tlb_hit(tlb_addr, addr)) {
63
+ if (!victim_tlb_hit(env, mmu_idx, index, access_type,
64
+ addr & TARGET_PAGE_MASK)) {
65
+ tlb_fill(env_cpu(env), addr, data->size, access_type, mmu_idx, ra);
66
+ maybe_resized = true;
67
+ index = tlb_index(env, mmu_idx, addr);
68
+ entry = tlb_entry(env, mmu_idx, addr);
69
+ }
70
+ tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
71
+ }
72
+
73
+ data->flags = tlb_addr & TLB_FLAGS_MASK;
74
+ data->full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
75
+ /* Compute haddr speculatively; depending on flags it might be invalid. */
76
+ data->haddr = (void *)((uintptr_t)addr + entry->addend);
77
+
78
+ return maybe_resized;
79
+}
80
+
81
+/**
82
+ * mmu_watch_or_dirty
83
+ * @env: cpu context
84
+ * @data: lookup parameters
85
+ * @access_type: load/store/code
86
+ * @ra: return address into tcg generated code, or 0
87
+ *
88
+ * Trigger watchpoints for @data.addr:@data.size;
89
+ * record writes to protected clean pages.
90
+ */
91
+static void mmu_watch_or_dirty(CPUArchState *env, MMULookupPageData *data,
92
+ MMUAccessType access_type, uintptr_t ra)
93
+{
94
+ CPUTLBEntryFull *full = data->full;
95
+ target_ulong addr = data->addr;
96
+ int flags = data->flags;
97
+ int size = data->size;
98
+
99
+ /* On watchpoint hit, this will longjmp out. */
100
+ if (flags & TLB_WATCHPOINT) {
101
+ int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ;
102
+ cpu_check_watchpoint(env_cpu(env), addr, size, full->attrs, wp, ra);
103
+ flags &= ~TLB_WATCHPOINT;
104
+ }
105
+
106
+ /* Note that notdirty is only set for writes. */
107
+ if (flags & TLB_NOTDIRTY) {
108
+ notdirty_write(env_cpu(env), addr, size, full, ra);
109
+ flags &= ~TLB_NOTDIRTY;
110
+ }
111
+ data->flags = flags;
112
+}
113
+
114
+/**
115
+ * mmu_lookup: translate page(s)
116
+ * @env: cpu context
117
+ * @addr: virtual address
118
+ * @oi: combined mmu_idx and MemOp
119
+ * @ra: return address into tcg generated code, or 0
120
+ * @access_type: load/store/code
121
+ * @l: output result
122
+ *
123
+ * Resolve the translation for the page(s) beginning at @addr, for MemOp.size
124
+ * bytes. Return true if the lookup crosses a page boundary.
125
+ */
126
+static bool mmu_lookup(CPUArchState *env, target_ulong addr, MemOpIdx oi,
127
+ uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
128
+{
129
+ unsigned a_bits;
130
+ bool crosspage;
131
+ int flags;
132
+
133
+ l->memop = get_memop(oi);
134
+ l->mmu_idx = get_mmuidx(oi);
135
+
136
+ tcg_debug_assert(l->mmu_idx < NB_MMU_MODES);
137
+
138
+ /* Handle CPU specific unaligned behaviour */
139
+ a_bits = get_alignment_bits(l->memop);
140
+ if (addr & ((1 << a_bits) - 1)) {
141
+ cpu_unaligned_access(env_cpu(env), addr, type, l->mmu_idx, ra);
142
+ }
143
+
144
+ l->page[0].addr = addr;
145
+ l->page[0].size = memop_size(l->memop);
146
+ l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
147
+ l->page[1].size = 0;
148
+ crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
149
+
150
+ if (likely(!crosspage)) {
151
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
152
+
153
+ flags = l->page[0].flags;
154
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
155
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
156
+ }
157
+ if (unlikely(flags & TLB_BSWAP)) {
158
+ l->memop ^= MO_BSWAP;
159
+ }
160
+ } else {
161
+ /* Finish compute of page crossing. */
162
+ int size0 = l->page[1].addr - addr;
163
+ l->page[1].size = l->page[0].size - size0;
164
+ l->page[0].size = size0;
165
+
166
+ /*
167
+ * Lookup both pages, recognizing exceptions from either. If the
168
+ * second lookup potentially resized, refresh first CPUTLBEntryFull.
169
+ */
170
+ mmu_lookup1(env, &l->page[0], l->mmu_idx, type, ra);
171
+ if (mmu_lookup1(env, &l->page[1], l->mmu_idx, type, ra)) {
172
+ uintptr_t index = tlb_index(env, l->mmu_idx, addr);
173
+ l->page[0].full = &env_tlb(env)->d[l->mmu_idx].fulltlb[index];
174
+ }
175
+
176
+ flags = l->page[0].flags | l->page[1].flags;
177
+ if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
178
+ mmu_watch_or_dirty(env, &l->page[0], type, ra);
179
+ mmu_watch_or_dirty(env, &l->page[1], type, ra);
180
+ }
181
+
182
+ /*
183
+ * Since target/sparc is the only user of TLB_BSWAP, and all
184
+ * Sparc accesses are aligned, any treatment across two pages
185
+ * would be arbitrary. Refuse it until there's a use.
186
+ */
187
+ tcg_debug_assert((flags & TLB_BSWAP) == 0);
188
+ }
189
+
190
+ return crosspage;
26
+}
191
+}
27
+
192
+
28
/*
193
/*
29
* These outermost fold_<op> functions are sorted alphabetically.
194
* Probe for an atomic operation. Do not allow unaligned operations,
195
* or io operations to proceed. Return the host address.
196
@@ -XXX,XX +XXX,XX @@ load_memop(const void *haddr, MemOp op)
197
}
198
}
199
200
-static inline uint64_t QEMU_ALWAYS_INLINE
201
-load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
202
- uintptr_t retaddr, MemOp op, MMUAccessType access_type,
203
- FullLoadHelper *full_load)
204
-{
205
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
206
- const size_t size = memop_size(op);
207
- uintptr_t mmu_idx = get_mmuidx(oi);
208
- uintptr_t index;
209
- CPUTLBEntry *entry;
210
- target_ulong tlb_addr;
211
- void *haddr;
212
- uint64_t res;
213
-
214
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
215
-
216
- /* Handle CPU specific unaligned behaviour */
217
- if (addr & ((1 << a_bits) - 1)) {
218
- cpu_unaligned_access(env_cpu(env), addr, access_type,
219
- mmu_idx, retaddr);
220
- }
221
-
222
- index = tlb_index(env, mmu_idx, addr);
223
- entry = tlb_entry(env, mmu_idx, addr);
224
- tlb_addr = tlb_read_idx(entry, access_type);
225
-
226
- /* If the TLB entry is for a different page, reload and try again. */
227
- if (!tlb_hit(tlb_addr, addr)) {
228
- if (!victim_tlb_hit(env, mmu_idx, index, access_type,
229
- addr & TARGET_PAGE_MASK)) {
230
- tlb_fill(env_cpu(env), addr, size,
231
- access_type, mmu_idx, retaddr);
232
- index = tlb_index(env, mmu_idx, addr);
233
- entry = tlb_entry(env, mmu_idx, addr);
234
- }
235
- tlb_addr = tlb_read_idx(entry, access_type);
236
- tlb_addr &= ~TLB_INVALID_MASK;
237
- }
238
-
239
- /* Handle anything that isn't just a straight memory access. */
240
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
241
- CPUTLBEntryFull *full;
242
- bool need_swap;
243
-
244
- /* For anything that is unaligned, recurse through full_load. */
245
- if ((addr & (size - 1)) != 0) {
246
- goto do_unaligned_access;
247
- }
248
-
249
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
250
-
251
- /* Handle watchpoints. */
252
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
253
- /* On watchpoint hit, this will longjmp out. */
254
- cpu_check_watchpoint(env_cpu(env), addr, size,
255
- full->attrs, BP_MEM_READ, retaddr);
256
- }
257
-
258
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
259
-
260
- /* Handle I/O access. */
261
- if (likely(tlb_addr & TLB_MMIO)) {
262
- return io_readx(env, full, mmu_idx, addr, retaddr,
263
- access_type, op ^ (need_swap * MO_BSWAP));
264
- }
265
-
266
- haddr = (void *)((uintptr_t)addr + entry->addend);
267
-
268
- /*
269
- * Keep these two load_memop separate to ensure that the compiler
270
- * is able to fold the entire function to a single instruction.
271
- * There is a build-time assert inside to remind you of this. ;-)
272
- */
273
- if (unlikely(need_swap)) {
274
- return load_memop(haddr, op ^ MO_BSWAP);
275
- }
276
- return load_memop(haddr, op);
277
- }
278
-
279
- /* Handle slow unaligned access (it spans two pages or IO). */
280
- if (size > 1
281
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
282
- >= TARGET_PAGE_SIZE)) {
283
- target_ulong addr1, addr2;
284
- uint64_t r1, r2;
285
- unsigned shift;
286
- do_unaligned_access:
287
- addr1 = addr & ~((target_ulong)size - 1);
288
- addr2 = addr1 + size;
289
- r1 = full_load(env, addr1, oi, retaddr);
290
- r2 = full_load(env, addr2, oi, retaddr);
291
- shift = (addr & (size - 1)) * 8;
292
-
293
- if (memop_big_endian(op)) {
294
- /* Big-endian combine. */
295
- res = (r1 << shift) | (r2 >> ((size * 8) - shift));
296
- } else {
297
- /* Little-endian combine. */
298
- res = (r1 >> shift) | (r2 << ((size * 8) - shift));
299
- }
300
- return res & MAKE_64BIT_MASK(0, size * 8);
301
- }
302
-
303
- haddr = (void *)((uintptr_t)addr + entry->addend);
304
- return load_memop(haddr, op);
305
-}
306
-
307
/*
308
* For the benefit of TCG generated code, we want to avoid the
309
* complication of ABI-specific return type promotion and always
310
@@ -XXX,XX +XXX,XX @@ load_helper(CPUArchState *env, target_ulong addr, MemOpIdx oi,
311
* We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
312
*/
313
314
-static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
315
- MemOpIdx oi, uintptr_t retaddr)
316
+/**
317
+ * do_ld_mmio_beN:
318
+ * @env: cpu context
319
+ * @p: translation parameters
320
+ * @ret_be: accumulated data
321
+ * @mmu_idx: virtual address context
322
+ * @ra: return address into tcg generated code, or 0
30
+ *
323
+ *
31
+ * The ordering of the transformations should be:
324
+ * Load @p->size bytes from @p->addr, which is memory-mapped i/o.
32
+ * 1) those that produce a constant
325
+ * The bytes are concatenated in big-endian order with @ret_be.
33
+ * 2) those that produce a copy
326
+ */
34
+ * 3) those that produce information about the result value.
327
+static uint64_t do_ld_mmio_beN(CPUArchState *env, MMULookupPageData *p,
328
+ uint64_t ret_be, int mmu_idx,
329
+ MMUAccessType type, uintptr_t ra)
330
{
331
- validate_memop(oi, MO_UB);
332
- return load_helper(env, addr, oi, retaddr, MO_UB, MMU_DATA_LOAD,
333
- full_ldub_mmu);
334
+ CPUTLBEntryFull *full = p->full;
335
+ target_ulong addr = p->addr;
336
+ int i, size = p->size;
337
+
338
+ QEMU_IOTHREAD_LOCK_GUARD();
339
+ for (i = 0; i < size; i++) {
340
+ uint8_t x = io_readx(env, full, mmu_idx, addr + i, ra, type, MO_UB);
341
+ ret_be = (ret_be << 8) | x;
342
+ }
343
+ return ret_be;
344
+}
345
+
346
+/**
347
+ * do_ld_bytes_beN
348
+ * @p: translation parameters
349
+ * @ret_be: accumulated data
350
+ *
351
+ * Load @p->size bytes from @p->haddr, which is RAM.
352
+ * The bytes to concatenated in big-endian order with @ret_be.
353
+ */
354
+static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
355
+{
356
+ uint8_t *haddr = p->haddr;
357
+ int i, size = p->size;
358
+
359
+ for (i = 0; i < size; i++) {
360
+ ret_be = (ret_be << 8) | haddr[i];
361
+ }
362
+ return ret_be;
363
+}
364
+
365
+/*
366
+ * Wrapper for the above.
367
+ */
368
+static uint64_t do_ld_beN(CPUArchState *env, MMULookupPageData *p,
369
+ uint64_t ret_be, int mmu_idx,
370
+ MMUAccessType type, uintptr_t ra)
371
+{
372
+ if (unlikely(p->flags & TLB_MMIO)) {
373
+ return do_ld_mmio_beN(env, p, ret_be, mmu_idx, type, ra);
374
+ } else {
375
+ return do_ld_bytes_beN(p, ret_be);
376
+ }
377
+}
378
+
379
+static uint8_t do_ld_1(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
380
+ MMUAccessType type, uintptr_t ra)
381
+{
382
+ if (unlikely(p->flags & TLB_MMIO)) {
383
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, MO_UB);
384
+ } else {
385
+ return *(uint8_t *)p->haddr;
386
+ }
387
+}
388
+
389
+static uint16_t do_ld_2(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
390
+ MMUAccessType type, MemOp memop, uintptr_t ra)
391
+{
392
+ uint64_t ret;
393
+
394
+ if (unlikely(p->flags & TLB_MMIO)) {
395
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
396
+ }
397
+
398
+ /* Perform the load host endian, then swap if necessary. */
399
+ ret = load_memop(p->haddr, MO_UW);
400
+ if (memop & MO_BSWAP) {
401
+ ret = bswap16(ret);
402
+ }
403
+ return ret;
404
+}
405
+
406
+static uint32_t do_ld_4(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
407
+ MMUAccessType type, MemOp memop, uintptr_t ra)
408
+{
409
+ uint32_t ret;
410
+
411
+ if (unlikely(p->flags & TLB_MMIO)) {
412
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
413
+ }
414
+
415
+ /* Perform the load host endian. */
416
+ ret = load_memop(p->haddr, MO_UL);
417
+ if (memop & MO_BSWAP) {
418
+ ret = bswap32(ret);
419
+ }
420
+ return ret;
421
+}
422
+
423
+static uint64_t do_ld_8(CPUArchState *env, MMULookupPageData *p, int mmu_idx,
424
+ MMUAccessType type, MemOp memop, uintptr_t ra)
425
+{
426
+ uint64_t ret;
427
+
428
+ if (unlikely(p->flags & TLB_MMIO)) {
429
+ return io_readx(env, p->full, mmu_idx, p->addr, ra, type, memop);
430
+ }
431
+
432
+ /* Perform the load host endian. */
433
+ ret = load_memop(p->haddr, MO_UQ);
434
+ if (memop & MO_BSWAP) {
435
+ ret = bswap64(ret);
436
+ }
437
+ return ret;
438
+}
439
+
440
+static uint8_t do_ld1_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
441
+ uintptr_t ra, MMUAccessType access_type)
442
+{
443
+ MMULookupLocals l;
444
+ bool crosspage;
445
+
446
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
447
+ tcg_debug_assert(!crosspage);
448
+
449
+ return do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
450
}
451
452
tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
453
MemOpIdx oi, uintptr_t retaddr)
454
{
455
- return full_ldub_mmu(env, addr, oi, retaddr);
456
+ validate_memop(oi, MO_UB);
457
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
458
}
459
460
-static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
461
- MemOpIdx oi, uintptr_t retaddr)
462
+static uint16_t do_ld2_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
463
+ uintptr_t ra, MMUAccessType access_type)
464
{
465
- validate_memop(oi, MO_LEUW);
466
- return load_helper(env, addr, oi, retaddr, MO_LEUW, MMU_DATA_LOAD,
467
- full_le_lduw_mmu);
468
+ MMULookupLocals l;
469
+ bool crosspage;
470
+ uint16_t ret;
471
+ uint8_t a, b;
472
+
473
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
474
+ if (likely(!crosspage)) {
475
+ return do_ld_2(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
476
+ }
477
+
478
+ a = do_ld_1(env, &l.page[0], l.mmu_idx, access_type, ra);
479
+ b = do_ld_1(env, &l.page[1], l.mmu_idx, access_type, ra);
480
+
481
+ if ((l.memop & MO_BSWAP) == MO_LE) {
482
+ ret = a | (b << 8);
483
+ } else {
484
+ ret = b | (a << 8);
485
+ }
486
+ return ret;
487
}
488
489
tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
490
MemOpIdx oi, uintptr_t retaddr)
491
{
492
- return full_le_lduw_mmu(env, addr, oi, retaddr);
493
-}
494
-
495
-static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
496
- MemOpIdx oi, uintptr_t retaddr)
497
-{
498
- validate_memop(oi, MO_BEUW);
499
- return load_helper(env, addr, oi, retaddr, MO_BEUW, MMU_DATA_LOAD,
500
- full_be_lduw_mmu);
501
+ validate_memop(oi, MO_LEUW);
502
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
503
}
504
505
tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
506
MemOpIdx oi, uintptr_t retaddr)
507
{
508
- return full_be_lduw_mmu(env, addr, oi, retaddr);
509
+ validate_memop(oi, MO_BEUW);
510
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
511
}
512
513
-static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
514
- MemOpIdx oi, uintptr_t retaddr)
515
+static uint32_t do_ld4_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
516
+ uintptr_t ra, MMUAccessType access_type)
517
{
518
- validate_memop(oi, MO_LEUL);
519
- return load_helper(env, addr, oi, retaddr, MO_LEUL, MMU_DATA_LOAD,
520
- full_le_ldul_mmu);
521
+ MMULookupLocals l;
522
+ bool crosspage;
523
+ uint32_t ret;
524
+
525
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
526
+ if (likely(!crosspage)) {
527
+ return do_ld_4(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
528
+ }
529
+
530
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
531
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
532
+ if ((l.memop & MO_BSWAP) == MO_LE) {
533
+ ret = bswap32(ret);
534
+ }
535
+ return ret;
536
}
537
538
tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
539
MemOpIdx oi, uintptr_t retaddr)
540
{
541
- return full_le_ldul_mmu(env, addr, oi, retaddr);
542
-}
543
-
544
-static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
545
- MemOpIdx oi, uintptr_t retaddr)
546
-{
547
- validate_memop(oi, MO_BEUL);
548
- return load_helper(env, addr, oi, retaddr, MO_BEUL, MMU_DATA_LOAD,
549
- full_be_ldul_mmu);
550
+ validate_memop(oi, MO_LEUL);
551
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
552
}
553
554
tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
555
MemOpIdx oi, uintptr_t retaddr)
556
{
557
- return full_be_ldul_mmu(env, addr, oi, retaddr);
558
+ validate_memop(oi, MO_BEUL);
559
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
560
+}
561
+
562
+static uint64_t do_ld8_mmu(CPUArchState *env, target_ulong addr, MemOpIdx oi,
563
+ uintptr_t ra, MMUAccessType access_type)
564
+{
565
+ MMULookupLocals l;
566
+ bool crosspage;
567
+ uint64_t ret;
568
+
569
+ crosspage = mmu_lookup(env, addr, oi, ra, access_type, &l);
570
+ if (likely(!crosspage)) {
571
+ return do_ld_8(env, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
572
+ }
573
+
574
+ ret = do_ld_beN(env, &l.page[0], 0, l.mmu_idx, access_type, ra);
575
+ ret = do_ld_beN(env, &l.page[1], ret, l.mmu_idx, access_type, ra);
576
+ if ((l.memop & MO_BSWAP) == MO_LE) {
577
+ ret = bswap64(ret);
578
+ }
579
+ return ret;
580
}
581
582
uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
583
MemOpIdx oi, uintptr_t retaddr)
584
{
585
validate_memop(oi, MO_LEUQ);
586
- return load_helper(env, addr, oi, retaddr, MO_LEUQ, MMU_DATA_LOAD,
587
- helper_le_ldq_mmu);
588
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
589
}
590
591
uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
592
MemOpIdx oi, uintptr_t retaddr)
593
{
594
validate_memop(oi, MO_BEUQ);
595
- return load_helper(env, addr, oi, retaddr, MO_BEUQ, MMU_DATA_LOAD,
596
- helper_be_ldq_mmu);
597
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_DATA_LOAD);
598
}
599
600
/*
601
@@ -XXX,XX +XXX,XX @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
602
* Load helpers for cpu_ldst.h.
35
*/
603
*/
36
604
37
static bool fold_add(OptContext *ctx, TCGOp *op)
605
-static inline uint64_t cpu_load_helper(CPUArchState *env, abi_ptr addr,
38
@@ -XXX,XX +XXX,XX @@ static bool fold_add2_i32(OptContext *ctx, TCGOp *op)
606
- MemOpIdx oi, uintptr_t retaddr,
39
607
- FullLoadHelper *full_load)
40
static bool fold_and(OptContext *ctx, TCGOp *op)
608
+static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
41
{
609
{
42
- return fold_const2(ctx, op);
610
- uint64_t ret;
43
+ if (fold_const2(ctx, op) ||
611
-
44
+ fold_xx_to_x(ctx, op)) {
612
- ret = full_load(env, addr, oi, retaddr);
45
+ return true;
613
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
46
+ }
614
- return ret;
47
+ return false;
615
}
48
}
616
49
617
uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
50
static bool fold_andc(OptContext *ctx, TCGOp *op)
618
{
51
@@ -XXX,XX +XXX,XX @@ static bool fold_not(OptContext *ctx, TCGOp *op)
619
- return cpu_load_helper(env, addr, oi, ra, full_ldub_mmu);
52
620
+ uint8_t ret;
53
static bool fold_or(OptContext *ctx, TCGOp *op)
621
+
54
{
622
+ validate_memop(oi, MO_UB);
55
- return fold_const2(ctx, op);
623
+ ret = do_ld1_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
56
+ if (fold_const2(ctx, op) ||
624
+ plugin_load_cb(env, addr, oi);
57
+ fold_xx_to_x(ctx, op)) {
625
+ return ret;
58
+ return true;
626
}
59
+ }
627
60
+ return false;
628
uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
61
}
629
MemOpIdx oi, uintptr_t ra)
62
630
{
63
static bool fold_orc(OptContext *ctx, TCGOp *op)
631
- return cpu_load_helper(env, addr, oi, ra, full_be_lduw_mmu);
64
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
632
+ uint16_t ret;
65
break;
633
+
66
}
634
+ validate_memop(oi, MO_BEUW);
67
635
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
68
- /* Simplify expression for "op r, a, a => mov r, a" cases */
636
+ plugin_load_cb(env, addr, oi);
69
- switch (opc) {
637
+ return ret;
70
- CASE_OP_32_64_VEC(or):
638
}
71
- CASE_OP_32_64_VEC(and):
639
72
- if (args_are_copies(op->args[1], op->args[2])) {
640
uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
73
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
641
MemOpIdx oi, uintptr_t ra)
74
- continue;
642
{
75
- }
643
- return cpu_load_helper(env, addr, oi, ra, full_be_ldul_mmu);
76
- break;
644
+ uint32_t ret;
77
- default:
645
+
78
- break;
646
+ validate_memop(oi, MO_BEUL);
79
- }
647
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
80
-
648
+ plugin_load_cb(env, addr, oi);
81
/*
649
+ return ret;
82
* Process each opcode.
650
}
83
* Sorted alphabetically by opcode as much as possible.
651
652
uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
653
MemOpIdx oi, uintptr_t ra)
654
{
655
- return cpu_load_helper(env, addr, oi, ra, helper_be_ldq_mmu);
656
+ uint64_t ret;
657
+
658
+ validate_memop(oi, MO_BEUQ);
659
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
660
+ plugin_load_cb(env, addr, oi);
661
+ return ret;
662
}
663
664
uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
665
MemOpIdx oi, uintptr_t ra)
666
{
667
- return cpu_load_helper(env, addr, oi, ra, full_le_lduw_mmu);
668
+ uint16_t ret;
669
+
670
+ validate_memop(oi, MO_LEUW);
671
+ ret = do_ld2_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
672
+ plugin_load_cb(env, addr, oi);
673
+ return ret;
674
}
675
676
uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
677
MemOpIdx oi, uintptr_t ra)
678
{
679
- return cpu_load_helper(env, addr, oi, ra, full_le_ldul_mmu);
680
+ uint32_t ret;
681
+
682
+ validate_memop(oi, MO_LEUL);
683
+ ret = do_ld4_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
684
+ plugin_load_cb(env, addr, oi);
685
+ return ret;
686
}
687
688
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
689
MemOpIdx oi, uintptr_t ra)
690
{
691
- return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
692
+ uint64_t ret;
693
+
694
+ validate_memop(oi, MO_LEUQ);
695
+ ret = do_ld8_mmu(env, addr, oi, ra, MMU_DATA_LOAD);
696
+ plugin_load_cb(env, addr, oi);
697
+ return ret;
698
}
699
700
Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
701
@@ -XXX,XX +XXX,XX @@ void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
702
703
/* Code access functions. */
704
705
-static uint64_t full_ldub_code(CPUArchState *env, target_ulong addr,
706
- MemOpIdx oi, uintptr_t retaddr)
707
-{
708
- return load_helper(env, addr, oi, retaddr, MO_8,
709
- MMU_INST_FETCH, full_ldub_code);
710
-}
711
-
712
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
713
{
714
MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
715
- return full_ldub_code(env, addr, oi, 0);
716
-}
717
-
718
-static uint64_t full_lduw_code(CPUArchState *env, target_ulong addr,
719
- MemOpIdx oi, uintptr_t retaddr)
720
-{
721
- return load_helper(env, addr, oi, retaddr, MO_TEUW,
722
- MMU_INST_FETCH, full_lduw_code);
723
+ return do_ld1_mmu(env, addr, oi, 0, MMU_INST_FETCH);
724
}
725
726
uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
727
{
728
MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
729
- return full_lduw_code(env, addr, oi, 0);
730
-}
731
-
732
-static uint64_t full_ldl_code(CPUArchState *env, target_ulong addr,
733
- MemOpIdx oi, uintptr_t retaddr)
734
-{
735
- return load_helper(env, addr, oi, retaddr, MO_TEUL,
736
- MMU_INST_FETCH, full_ldl_code);
737
+ return do_ld2_mmu(env, addr, oi, 0, MMU_INST_FETCH);
738
}
739
740
uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
741
{
742
MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
743
- return full_ldl_code(env, addr, oi, 0);
744
-}
745
-
746
-static uint64_t full_ldq_code(CPUArchState *env, target_ulong addr,
747
- MemOpIdx oi, uintptr_t retaddr)
748
-{
749
- return load_helper(env, addr, oi, retaddr, MO_TEUQ,
750
- MMU_INST_FETCH, full_ldq_code);
751
+ return do_ld4_mmu(env, addr, oi, 0, MMU_INST_FETCH);
752
}
753
754
uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
755
{
756
MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
757
- return full_ldq_code(env, addr, oi, 0);
758
+ return do_ld8_mmu(env, addr, oi, 0, MMU_INST_FETCH);
759
}
760
761
uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
762
MemOpIdx oi, uintptr_t retaddr)
763
{
764
- return full_ldub_code(env, addr, oi, retaddr);
765
+ return do_ld1_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
766
}
767
768
uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
769
MemOpIdx oi, uintptr_t retaddr)
770
{
771
- MemOp mop = get_memop(oi);
772
- int idx = get_mmuidx(oi);
773
- uint16_t ret;
774
-
775
- ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
776
- if ((mop & MO_BSWAP) != MO_TE) {
777
- ret = bswap16(ret);
778
- }
779
- return ret;
780
+ return do_ld2_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
781
}
782
783
uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
784
MemOpIdx oi, uintptr_t retaddr)
785
{
786
- MemOp mop = get_memop(oi);
787
- int idx = get_mmuidx(oi);
788
- uint32_t ret;
789
-
790
- ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
791
- if ((mop & MO_BSWAP) != MO_TE) {
792
- ret = bswap32(ret);
793
- }
794
- return ret;
795
+ return do_ld4_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
796
}
797
798
uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
799
MemOpIdx oi, uintptr_t retaddr)
800
{
801
- MemOp mop = get_memop(oi);
802
- int idx = get_mmuidx(oi);
803
- uint64_t ret;
804
-
805
- ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
806
- if ((mop & MO_BSWAP) != MO_TE) {
807
- ret = bswap64(ret);
808
- }
809
- return ret;
810
+ return do_ld8_mmu(env, addr, oi, retaddr, MMU_INST_FETCH);
811
}
84
--
812
--
85
2.25.1
813
2.34.1
86
814
87
815
diff view generated by jsdifflib
1
Copy z_mask into OptContext, for writeback to the
1
Instead of trying to unify all operations on uint64_t, use
2
first output within the new function.
2
mmu_lookup() to perform the basic tlb hit and resolution.
3
Create individual functions to handle access by size.
3
4
4
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
7
---
7
---
8
tcg/optimize.c | 49 +++++++++++++++++++++++++++++++++----------------
8
accel/tcg/cputlb.c | 408 +++++++++++++++++++++------------------------
9
1 file changed, 33 insertions(+), 16 deletions(-)
9
1 file changed, 193 insertions(+), 215 deletions(-)
10
10
11
diff --git a/tcg/optimize.c b/tcg/optimize.c
11
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
12
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
13
--- a/tcg/optimize.c
13
--- a/accel/tcg/cputlb.c
14
+++ b/tcg/optimize.c
14
+++ b/accel/tcg/cputlb.c
15
@@ -XXX,XX +XXX,XX @@ typedef struct OptContext {
15
@@ -XXX,XX +XXX,XX @@ store_memop(void *haddr, uint64_t val, MemOp op)
16
TCGContext *tcg;
17
TCGOp *prev_mb;
18
TCGTempSet temps_used;
19
+
20
+ /* In flight values from optimization. */
21
+ uint64_t z_mask;
22
} OptContext;
23
24
static inline TempOptInfo *ts_info(TCGTemp *ts)
25
@@ -XXX,XX +XXX,XX @@ static void copy_propagate(OptContext *ctx, TCGOp *op,
26
}
16
}
27
}
17
}
28
18
29
+static void finish_folding(OptContext *ctx, TCGOp *op)
19
-static void full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
20
- MemOpIdx oi, uintptr_t retaddr);
21
-
22
-static void __attribute__((noinline))
23
-store_helper_unaligned(CPUArchState *env, target_ulong addr, uint64_t val,
24
- uintptr_t retaddr, size_t size, uintptr_t mmu_idx,
25
- bool big_endian)
26
+/**
27
+ * do_st_mmio_leN:
28
+ * @env: cpu context
29
+ * @p: translation parameters
30
+ * @val_le: data to store
31
+ * @mmu_idx: virtual address context
32
+ * @ra: return address into tcg generated code, or 0
33
+ *
34
+ * Store @p->size bytes at @p->addr, which is memory-mapped i/o.
35
+ * The bytes to store are extracted in little-endian order from @val_le;
36
+ * return the bytes of @val_le beyond @p->size that have not been stored.
37
+ */
38
+static uint64_t do_st_mmio_leN(CPUArchState *env, MMULookupPageData *p,
39
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
40
{
41
- uintptr_t index, index2;
42
- CPUTLBEntry *entry, *entry2;
43
- target_ulong page1, page2, tlb_addr, tlb_addr2;
44
- MemOpIdx oi;
45
- size_t size2;
46
- int i;
47
+ CPUTLBEntryFull *full = p->full;
48
+ target_ulong addr = p->addr;
49
+ int i, size = p->size;
50
51
- /*
52
- * Ensure the second page is in the TLB. Note that the first page
53
- * is already guaranteed to be filled, and that the second page
54
- * cannot evict the first. An exception to this rule is PAGE_WRITE_INV
55
- * handling: the first page could have evicted itself.
56
- */
57
- page1 = addr & TARGET_PAGE_MASK;
58
- page2 = (addr + size) & TARGET_PAGE_MASK;
59
- size2 = (addr + size) & ~TARGET_PAGE_MASK;
60
- index2 = tlb_index(env, mmu_idx, page2);
61
- entry2 = tlb_entry(env, mmu_idx, page2);
62
-
63
- tlb_addr2 = tlb_addr_write(entry2);
64
- if (page1 != page2 && !tlb_hit_page(tlb_addr2, page2)) {
65
- if (!victim_tlb_hit(env, mmu_idx, index2, MMU_DATA_STORE, page2)) {
66
- tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
67
- mmu_idx, retaddr);
68
- index2 = tlb_index(env, mmu_idx, page2);
69
- entry2 = tlb_entry(env, mmu_idx, page2);
70
- }
71
- tlb_addr2 = tlb_addr_write(entry2);
72
+ QEMU_IOTHREAD_LOCK_GUARD();
73
+ for (i = 0; i < size; i++, val_le >>= 8) {
74
+ io_writex(env, full, mmu_idx, val_le, addr + i, ra, MO_UB);
75
}
76
+ return val_le;
77
+}
78
79
- index = tlb_index(env, mmu_idx, addr);
80
- entry = tlb_entry(env, mmu_idx, addr);
81
- tlb_addr = tlb_addr_write(entry);
82
+/**
83
+ * do_st_bytes_leN:
84
+ * @p: translation parameters
85
+ * @val_le: data to store
86
+ *
87
+ * Store @p->size bytes at @p->haddr, which is RAM.
88
+ * The bytes to store are extracted in little-endian order from @val_le;
89
+ * return the bytes of @val_le beyond @p->size that have not been stored.
90
+ */
91
+static uint64_t do_st_bytes_leN(MMULookupPageData *p, uint64_t val_le)
30
+{
92
+{
31
+ const TCGOpDef *def = &tcg_op_defs[op->opc];
93
+ uint8_t *haddr = p->haddr;
32
+ int i, nb_oargs;
94
+ int i, size = p->size;
33
+
95
34
+ /*
96
- /*
35
+ * For an opcode that ends a BB, reset all temp data.
97
- * Handle watchpoints. Since this may trap, all checks
36
+ * We do no cross-BB optimization.
98
- * must happen before any store.
37
+ */
99
- */
38
+ if (def->flags & TCG_OPF_BB_END) {
100
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
39
+ memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
101
- cpu_check_watchpoint(env_cpu(env), addr, size - size2,
40
+ ctx->prev_mb = NULL;
102
- env_tlb(env)->d[mmu_idx].fulltlb[index].attrs,
103
- BP_MEM_WRITE, retaddr);
104
- }
105
- if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
106
- cpu_check_watchpoint(env_cpu(env), page2, size2,
107
- env_tlb(env)->d[mmu_idx].fulltlb[index2].attrs,
108
- BP_MEM_WRITE, retaddr);
109
+ for (i = 0; i < size; i++, val_le >>= 8) {
110
+ haddr[i] = val_le;
111
}
112
+ return val_le;
113
+}
114
115
- /*
116
- * XXX: not efficient, but simple.
117
- * This loop must go in the forward direction to avoid issues
118
- * with self-modifying code in Windows 64-bit.
119
- */
120
- oi = make_memop_idx(MO_UB, mmu_idx);
121
- if (big_endian) {
122
- for (i = 0; i < size; ++i) {
123
- /* Big-endian extract. */
124
- uint8_t val8 = val >> (((size - 1) * 8) - (i * 8));
125
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
126
- }
127
+/*
128
+ * Wrapper for the above.
129
+ */
130
+static uint64_t do_st_leN(CPUArchState *env, MMULookupPageData *p,
131
+ uint64_t val_le, int mmu_idx, uintptr_t ra)
132
+{
133
+ if (unlikely(p->flags & TLB_MMIO)) {
134
+ return do_st_mmio_leN(env, p, val_le, mmu_idx, ra);
135
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
136
+ return val_le >> (p->size * 8);
137
} else {
138
- for (i = 0; i < size; ++i) {
139
- /* Little-endian extract. */
140
- uint8_t val8 = val >> (i * 8);
141
- full_stb_mmu(env, addr + i, val8, oi, retaddr);
142
- }
143
+ return do_st_bytes_leN(p, val_le);
144
}
145
}
146
147
-static inline void QEMU_ALWAYS_INLINE
148
-store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
149
- MemOpIdx oi, uintptr_t retaddr, MemOp op)
150
+static void do_st_1(CPUArchState *env, MMULookupPageData *p, uint8_t val,
151
+ int mmu_idx, uintptr_t ra)
152
{
153
- const unsigned a_bits = get_alignment_bits(get_memop(oi));
154
- const size_t size = memop_size(op);
155
- uintptr_t mmu_idx = get_mmuidx(oi);
156
- uintptr_t index;
157
- CPUTLBEntry *entry;
158
- target_ulong tlb_addr;
159
- void *haddr;
160
-
161
- tcg_debug_assert(mmu_idx < NB_MMU_MODES);
162
-
163
- /* Handle CPU specific unaligned behaviour */
164
- if (addr & ((1 << a_bits) - 1)) {
165
- cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
166
- mmu_idx, retaddr);
167
+ if (unlikely(p->flags & TLB_MMIO)) {
168
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, MO_UB);
169
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
170
+ /* nothing */
171
+ } else {
172
+ *(uint8_t *)p->haddr = val;
173
}
174
-
175
- index = tlb_index(env, mmu_idx, addr);
176
- entry = tlb_entry(env, mmu_idx, addr);
177
- tlb_addr = tlb_addr_write(entry);
178
-
179
- /* If the TLB entry is for a different page, reload and try again. */
180
- if (!tlb_hit(tlb_addr, addr)) {
181
- if (!victim_tlb_hit(env, mmu_idx, index, MMU_DATA_STORE,
182
- addr & TARGET_PAGE_MASK)) {
183
- tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
184
- mmu_idx, retaddr);
185
- index = tlb_index(env, mmu_idx, addr);
186
- entry = tlb_entry(env, mmu_idx, addr);
187
- }
188
- tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
189
- }
190
-
191
- /* Handle anything that isn't just a straight memory access. */
192
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
193
- CPUTLBEntryFull *full;
194
- bool need_swap;
195
-
196
- /* For anything that is unaligned, recurse through byte stores. */
197
- if ((addr & (size - 1)) != 0) {
198
- goto do_unaligned_access;
199
- }
200
-
201
- full = &env_tlb(env)->d[mmu_idx].fulltlb[index];
202
-
203
- /* Handle watchpoints. */
204
- if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
205
- /* On watchpoint hit, this will longjmp out. */
206
- cpu_check_watchpoint(env_cpu(env), addr, size,
207
- full->attrs, BP_MEM_WRITE, retaddr);
208
- }
209
-
210
- need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
211
-
212
- /* Handle I/O access. */
213
- if (tlb_addr & TLB_MMIO) {
214
- io_writex(env, full, mmu_idx, val, addr, retaddr,
215
- op ^ (need_swap * MO_BSWAP));
216
- return;
217
- }
218
-
219
- /* Ignore writes to ROM. */
220
- if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
221
- return;
222
- }
223
-
224
- /* Handle clean RAM pages. */
225
- if (tlb_addr & TLB_NOTDIRTY) {
226
- notdirty_write(env_cpu(env), addr, size, full, retaddr);
227
- }
228
-
229
- haddr = (void *)((uintptr_t)addr + entry->addend);
230
-
231
- /*
232
- * Keep these two store_memop separate to ensure that the compiler
233
- * is able to fold the entire function to a single instruction.
234
- * There is a build-time assert inside to remind you of this. ;-)
235
- */
236
- if (unlikely(need_swap)) {
237
- store_memop(haddr, val, op ^ MO_BSWAP);
238
- } else {
239
- store_memop(haddr, val, op);
240
- }
241
- return;
242
- }
243
-
244
- /* Handle slow unaligned access (it spans two pages or IO). */
245
- if (size > 1
246
- && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
247
- >= TARGET_PAGE_SIZE)) {
248
- do_unaligned_access:
249
- store_helper_unaligned(env, addr, val, retaddr, size,
250
- mmu_idx, memop_big_endian(op));
251
- return;
252
- }
253
-
254
- haddr = (void *)((uintptr_t)addr + entry->addend);
255
- store_memop(haddr, val, op);
256
}
257
258
-static void __attribute__((noinline))
259
-full_stb_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
260
- MemOpIdx oi, uintptr_t retaddr)
261
+static void do_st_2(CPUArchState *env, MMULookupPageData *p, uint16_t val,
262
+ int mmu_idx, MemOp memop, uintptr_t ra)
263
{
264
- validate_memop(oi, MO_UB);
265
- store_helper(env, addr, val, oi, retaddr, MO_UB);
266
+ if (unlikely(p->flags & TLB_MMIO)) {
267
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
268
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
269
+ /* nothing */
270
+ } else {
271
+ /* Swap to host endian if necessary, then store. */
272
+ if (memop & MO_BSWAP) {
273
+ val = bswap16(val);
274
+ }
275
+ store_memop(p->haddr, val, MO_UW);
276
+ }
277
+}
278
+
279
+static void do_st_4(CPUArchState *env, MMULookupPageData *p, uint32_t val,
280
+ int mmu_idx, MemOp memop, uintptr_t ra)
281
+{
282
+ if (unlikely(p->flags & TLB_MMIO)) {
283
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
284
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
285
+ /* nothing */
286
+ } else {
287
+ /* Swap to host endian if necessary, then store. */
288
+ if (memop & MO_BSWAP) {
289
+ val = bswap32(val);
290
+ }
291
+ store_memop(p->haddr, val, MO_UL);
292
+ }
293
+}
294
+
295
+static void do_st_8(CPUArchState *env, MMULookupPageData *p, uint64_t val,
296
+ int mmu_idx, MemOp memop, uintptr_t ra)
297
+{
298
+ if (unlikely(p->flags & TLB_MMIO)) {
299
+ io_writex(env, p->full, mmu_idx, val, p->addr, ra, memop);
300
+ } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
301
+ /* nothing */
302
+ } else {
303
+ /* Swap to host endian if necessary, then store. */
304
+ if (memop & MO_BSWAP) {
305
+ val = bswap64(val);
306
+ }
307
+ store_memop(p->haddr, val, MO_UQ);
308
+ }
309
}
310
311
void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
312
- MemOpIdx oi, uintptr_t retaddr)
313
+ MemOpIdx oi, uintptr_t ra)
314
{
315
- full_stb_mmu(env, addr, val, oi, retaddr);
316
+ MMULookupLocals l;
317
+ bool crosspage;
318
+
319
+ validate_memop(oi, MO_UB);
320
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
321
+ tcg_debug_assert(!crosspage);
322
+
323
+ do_st_1(env, &l.page[0], val, l.mmu_idx, ra);
324
}
325
326
-static void full_le_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
327
- MemOpIdx oi, uintptr_t retaddr)
328
+static void do_st2_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
329
+ MemOpIdx oi, uintptr_t ra)
330
{
331
- validate_memop(oi, MO_LEUW);
332
- store_helper(env, addr, val, oi, retaddr, MO_LEUW);
333
+ MMULookupLocals l;
334
+ bool crosspage;
335
+ uint8_t a, b;
336
+
337
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
338
+ if (likely(!crosspage)) {
339
+ do_st_2(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
41
+ return;
340
+ return;
42
+ }
341
+ }
43
+
342
+
44
+ nb_oargs = def->nb_oargs;
343
+ if ((l.memop & MO_BSWAP) == MO_LE) {
45
+ for (i = 0; i < nb_oargs; i++) {
344
+ a = val, b = val >> 8;
46
+ reset_temp(op->args[i]);
345
+ } else {
47
+ /*
346
+ b = val, a = val >> 8;
48
+ * Save the corresponding known-zero bits mask for the
347
+ }
49
+ * first output argument (only one supported so far).
348
+ do_st_1(env, &l.page[0], a, l.mmu_idx, ra);
50
+ */
349
+ do_st_1(env, &l.page[1], b, l.mmu_idx, ra);
51
+ if (i == 0) {
350
}
52
+ arg_info(op->args[i])->z_mask = ctx->z_mask;
351
53
+ }
352
void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
54
+ }
353
MemOpIdx oi, uintptr_t retaddr)
354
{
355
- full_le_stw_mmu(env, addr, val, oi, retaddr);
356
-}
357
-
358
-static void full_be_stw_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
359
- MemOpIdx oi, uintptr_t retaddr)
360
-{
361
- validate_memop(oi, MO_BEUW);
362
- store_helper(env, addr, val, oi, retaddr, MO_BEUW);
363
+ validate_memop(oi, MO_LEUW);
364
+ do_st2_mmu(env, addr, val, oi, retaddr);
365
}
366
367
void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
368
MemOpIdx oi, uintptr_t retaddr)
369
{
370
- full_be_stw_mmu(env, addr, val, oi, retaddr);
371
+ validate_memop(oi, MO_BEUW);
372
+ do_st2_mmu(env, addr, val, oi, retaddr);
373
}
374
375
-static void full_le_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
376
- MemOpIdx oi, uintptr_t retaddr)
377
+static void do_st4_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
378
+ MemOpIdx oi, uintptr_t ra)
379
{
380
- validate_memop(oi, MO_LEUL);
381
- store_helper(env, addr, val, oi, retaddr, MO_LEUL);
382
+ MMULookupLocals l;
383
+ bool crosspage;
384
+
385
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
386
+ if (likely(!crosspage)) {
387
+ do_st_4(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
388
+ return;
389
+ }
390
+
391
+ /* Swap to little endian for simplicity, then store by bytes. */
392
+ if ((l.memop & MO_BSWAP) != MO_LE) {
393
+ val = bswap32(val);
394
+ }
395
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
396
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
397
}
398
399
void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
400
MemOpIdx oi, uintptr_t retaddr)
401
{
402
- full_le_stl_mmu(env, addr, val, oi, retaddr);
403
-}
404
-
405
-static void full_be_stl_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
406
- MemOpIdx oi, uintptr_t retaddr)
407
-{
408
- validate_memop(oi, MO_BEUL);
409
- store_helper(env, addr, val, oi, retaddr, MO_BEUL);
410
+ validate_memop(oi, MO_LEUL);
411
+ do_st4_mmu(env, addr, val, oi, retaddr);
412
}
413
414
void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
415
MemOpIdx oi, uintptr_t retaddr)
416
{
417
- full_be_stl_mmu(env, addr, val, oi, retaddr);
418
+ validate_memop(oi, MO_BEUL);
419
+ do_st4_mmu(env, addr, val, oi, retaddr);
55
+}
420
+}
56
+
421
+
57
static bool fold_call(OptContext *ctx, TCGOp *op)
422
+static void do_st8_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
58
{
423
+ MemOpIdx oi, uintptr_t ra)
59
TCGContext *s = ctx->tcg;
424
+{
60
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
425
+ MMULookupLocals l;
61
partmask &= 0xffffffffu;
426
+ bool crosspage;
62
affected &= 0xffffffffu;
427
+
63
}
428
+ crosspage = mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE, &l);
64
+ ctx.z_mask = z_mask;
429
+ if (likely(!crosspage)) {
65
430
+ do_st_8(env, &l.page[0], val, l.mmu_idx, l.memop, ra);
66
if (partmask == 0) {
431
+ return;
67
tcg_opt_gen_movi(&ctx, op, op->args[0], 0);
432
+ }
68
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
433
+
69
break;
434
+ /* Swap to little endian for simplicity, then store by bytes. */
70
}
435
+ if ((l.memop & MO_BSWAP) != MO_LE) {
71
436
+ val = bswap64(val);
72
- /* Some of the folding above can change opc. */
437
+ }
73
- opc = op->opc;
438
+ val = do_st_leN(env, &l.page[0], val, l.mmu_idx, ra);
74
- def = &tcg_op_defs[opc];
439
+ (void) do_st_leN(env, &l.page[1], val, l.mmu_idx, ra);
75
- if (def->flags & TCG_OPF_BB_END) {
440
}
76
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
441
77
- } else {
442
void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
78
- int nb_oargs = def->nb_oargs;
443
MemOpIdx oi, uintptr_t retaddr)
79
- for (i = 0; i < nb_oargs; i++) {
444
{
80
- reset_temp(op->args[i]);
445
validate_memop(oi, MO_LEUQ);
81
- /* Save the corresponding known-zero bits mask for the
446
- store_helper(env, addr, val, oi, retaddr, MO_LEUQ);
82
- first output argument (only one supported so far). */
447
+ do_st8_mmu(env, addr, val, oi, retaddr);
83
- if (i == 0) {
448
}
84
- arg_info(op->args[i])->z_mask = z_mask;
449
85
- }
450
void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
86
- }
451
MemOpIdx oi, uintptr_t retaddr)
87
- }
452
{
88
+ finish_folding(&ctx, op);
453
validate_memop(oi, MO_BEUQ);
89
454
- store_helper(env, addr, val, oi, retaddr, MO_BEUQ);
90
/* Eliminate duplicate and redundant fence instructions. */
455
+ do_st8_mmu(env, addr, val, oi, retaddr);
91
if (ctx.prev_mb) {
456
}
457
458
/*
459
* Store Helpers for cpu_ldst.h
460
*/
461
462
-typedef void FullStoreHelper(CPUArchState *env, target_ulong addr,
463
- uint64_t val, MemOpIdx oi, uintptr_t retaddr);
464
-
465
-static inline void cpu_store_helper(CPUArchState *env, target_ulong addr,
466
- uint64_t val, MemOpIdx oi, uintptr_t ra,
467
- FullStoreHelper *full_store)
468
+static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
469
{
470
- full_store(env, addr, val, oi, ra);
471
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
472
}
473
474
void cpu_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
475
MemOpIdx oi, uintptr_t retaddr)
476
{
477
- cpu_store_helper(env, addr, val, oi, retaddr, full_stb_mmu);
478
+ helper_ret_stb_mmu(env, addr, val, oi, retaddr);
479
+ plugin_store_cb(env, addr, oi);
480
}
481
482
void cpu_stw_be_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
483
MemOpIdx oi, uintptr_t retaddr)
484
{
485
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stw_mmu);
486
+ helper_be_stw_mmu(env, addr, val, oi, retaddr);
487
+ plugin_store_cb(env, addr, oi);
488
}
489
490
void cpu_stl_be_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
491
MemOpIdx oi, uintptr_t retaddr)
492
{
493
- cpu_store_helper(env, addr, val, oi, retaddr, full_be_stl_mmu);
494
+ helper_be_stl_mmu(env, addr, val, oi, retaddr);
495
+ plugin_store_cb(env, addr, oi);
496
}
497
498
void cpu_stq_be_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
499
MemOpIdx oi, uintptr_t retaddr)
500
{
501
- cpu_store_helper(env, addr, val, oi, retaddr, helper_be_stq_mmu);
502
+ helper_be_stq_mmu(env, addr, val, oi, retaddr);
503
+ plugin_store_cb(env, addr, oi);
504
}
505
506
void cpu_stw_le_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
507
MemOpIdx oi, uintptr_t retaddr)
508
{
509
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stw_mmu);
510
+ helper_le_stw_mmu(env, addr, val, oi, retaddr);
511
+ plugin_store_cb(env, addr, oi);
512
}
513
514
void cpu_stl_le_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
515
MemOpIdx oi, uintptr_t retaddr)
516
{
517
- cpu_store_helper(env, addr, val, oi, retaddr, full_le_stl_mmu);
518
+ helper_le_stl_mmu(env, addr, val, oi, retaddr);
519
+ plugin_store_cb(env, addr, oi);
520
}
521
522
void cpu_stq_le_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
523
MemOpIdx oi, uintptr_t retaddr)
524
{
525
- cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
526
+ helper_le_stq_mmu(env, addr, val, oi, retaddr);
527
+ plugin_store_cb(env, addr, oi);
528
}
529
530
void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
92
--
531
--
93
2.25.1
532
2.34.1
94
95
diff view generated by jsdifflib
Deleted patch
1
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
2
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
3
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
4
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
5
---
6
tcg/optimize.c | 9 ++++++---
7
1 file changed, 6 insertions(+), 3 deletions(-)
8
1
9
diff --git a/tcg/optimize.c b/tcg/optimize.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tcg/optimize.c
12
+++ b/tcg/optimize.c
13
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
14
uint64_t z_mask, partmask, affected, tmp;
15
TCGOpcode opc = op->opc;
16
const TCGOpDef *def;
17
+ bool done = false;
18
19
/* Calls are special. */
20
if (opc == INDEX_op_call) {
21
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
22
allocator where needed and possible. Also detect copies. */
23
switch (opc) {
24
CASE_OP_32_64_VEC(mov):
25
- tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
26
- continue;
27
+ done = tcg_opt_gen_mov(&ctx, op, op->args[0], op->args[1]);
28
+ break;
29
30
case INDEX_op_dup_vec:
31
if (arg_is_const(op->args[1])) {
32
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
33
break;
34
}
35
36
- finish_folding(&ctx, op);
37
+ if (!done) {
38
+ finish_folding(&ctx, op);
39
+ }
40
41
/* Eliminate duplicate and redundant fence instructions. */
42
if (ctx.prev_mb) {
43
--
44
2.25.1
45
46
diff view generated by jsdifflib
Deleted patch
1
This puts the separate mb optimization into the same framework
2
as the others. While fold_qemu_{ld,st} are currently identical,
3
that won't last as more code gets moved.
4
1
5
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
6
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
8
---
9
tcg/optimize.c | 89 +++++++++++++++++++++++++++++---------------------
10
1 file changed, 51 insertions(+), 38 deletions(-)
11
12
diff --git a/tcg/optimize.c b/tcg/optimize.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/tcg/optimize.c
15
+++ b/tcg/optimize.c
16
@@ -XXX,XX +XXX,XX @@ static bool fold_call(OptContext *ctx, TCGOp *op)
17
return true;
18
}
19
20
+static bool fold_mb(OptContext *ctx, TCGOp *op)
21
+{
22
+ /* Eliminate duplicate and redundant fence instructions. */
23
+ if (ctx->prev_mb) {
24
+ /*
25
+ * Merge two barriers of the same type into one,
26
+ * or a weaker barrier into a stronger one,
27
+ * or two weaker barriers into a stronger one.
28
+ * mb X; mb Y => mb X|Y
29
+ * mb; strl => mb; st
30
+ * ldaq; mb => ld; mb
31
+ * ldaq; strl => ld; mb; st
32
+ * Other combinations are also merged into a strong
33
+ * barrier. This is stricter than specified but for
34
+ * the purposes of TCG is better than not optimizing.
35
+ */
36
+ ctx->prev_mb->args[0] |= op->args[0];
37
+ tcg_op_remove(ctx->tcg, op);
38
+ } else {
39
+ ctx->prev_mb = op;
40
+ }
41
+ return true;
42
+}
43
+
44
+static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
45
+{
46
+ /* Opcodes that touch guest memory stop the mb optimization. */
47
+ ctx->prev_mb = NULL;
48
+ return false;
49
+}
50
+
51
+static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
52
+{
53
+ /* Opcodes that touch guest memory stop the mb optimization. */
54
+ ctx->prev_mb = NULL;
55
+ return false;
56
+}
57
+
58
/* Propagate constants and copies, fold constant expressions. */
59
void tcg_optimize(TCGContext *s)
60
{
61
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
62
}
63
break;
64
65
+ case INDEX_op_mb:
66
+ done = fold_mb(&ctx, op);
67
+ break;
68
+ case INDEX_op_qemu_ld_i32:
69
+ case INDEX_op_qemu_ld_i64:
70
+ done = fold_qemu_ld(&ctx, op);
71
+ break;
72
+ case INDEX_op_qemu_st_i32:
73
+ case INDEX_op_qemu_st8_i32:
74
+ case INDEX_op_qemu_st_i64:
75
+ done = fold_qemu_st(&ctx, op);
76
+ break;
77
+
78
default:
79
break;
80
}
81
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
82
if (!done) {
83
finish_folding(&ctx, op);
84
}
85
-
86
- /* Eliminate duplicate and redundant fence instructions. */
87
- if (ctx.prev_mb) {
88
- switch (opc) {
89
- case INDEX_op_mb:
90
- /* Merge two barriers of the same type into one,
91
- * or a weaker barrier into a stronger one,
92
- * or two weaker barriers into a stronger one.
93
- * mb X; mb Y => mb X|Y
94
- * mb; strl => mb; st
95
- * ldaq; mb => ld; mb
96
- * ldaq; strl => ld; mb; st
97
- * Other combinations are also merged into a strong
98
- * barrier. This is stricter than specified but for
99
- * the purposes of TCG is better than not optimizing.
100
- */
101
- ctx.prev_mb->args[0] |= op->args[0];
102
- tcg_op_remove(s, op);
103
- break;
104
-
105
- default:
106
- /* Opcodes that end the block stop the optimization. */
107
- if ((def->flags & TCG_OPF_BB_END) == 0) {
108
- break;
109
- }
110
- /* fallthru */
111
- case INDEX_op_qemu_ld_i32:
112
- case INDEX_op_qemu_ld_i64:
113
- case INDEX_op_qemu_st_i32:
114
- case INDEX_op_qemu_st8_i32:
115
- case INDEX_op_qemu_st_i64:
116
- /* Opcodes that touch guest memory stop the optimization. */
117
- ctx.prev_mb = NULL;
118
- break;
119
- }
120
- } else if (opc == INDEX_op_mb) {
121
- ctx.prev_mb = op;
122
- }
123
}
124
}
125
--
126
2.25.1
127
128
diff view generated by jsdifflib
1
Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>
1
This header is supposed to be private to tcg and in fact
2
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
2
does not need to be included here at all.
3
4
Reviewed-by: Song Gao <gaosong@loongson.cn>
5
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
6
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
4
---
7
---
5
tcg/optimize.c | 33 +++++++++++++++++++--------------
8
target/loongarch/csr_helper.c | 1 -
6
1 file changed, 19 insertions(+), 14 deletions(-)
9
target/loongarch/iocsr_helper.c | 1 -
10
2 files changed, 2 deletions(-)
7
11
8
diff --git a/tcg/optimize.c b/tcg/optimize.c
12
diff --git a/target/loongarch/csr_helper.c b/target/loongarch/csr_helper.c
9
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
10
--- a/tcg/optimize.c
14
--- a/target/loongarch/csr_helper.c
11
+++ b/tcg/optimize.c
15
+++ b/target/loongarch/csr_helper.c
12
@@ -XXX,XX +XXX,XX @@ static bool fold_andc(OptContext *ctx, TCGOp *op)
16
@@ -XXX,XX +XXX,XX @@
13
return fold_const2(ctx, op);
17
#include "exec/cpu_ldst.h"
14
}
18
#include "hw/irq.h"
15
19
#include "cpu-csr.h"
16
+static bool fold_brcond(OptContext *ctx, TCGOp *op)
20
-#include "tcg/tcg-ldst.h"
17
+{
21
18
+ TCGCond cond = op->args[2];
22
target_ulong helper_csrrd_pgd(CPULoongArchState *env)
19
+ int i = do_constant_folding_cond(op->opc, op->args[0], op->args[1], cond);
20
+
21
+ if (i == 0) {
22
+ tcg_op_remove(ctx->tcg, op);
23
+ return true;
24
+ }
25
+ if (i > 0) {
26
+ op->opc = INDEX_op_br;
27
+ op->args[0] = op->args[3];
28
+ }
29
+ return false;
30
+}
31
+
32
static bool fold_brcond2(OptContext *ctx, TCGOp *op)
33
{
23
{
34
TCGCond cond = op->args[4];
24
diff --git a/target/loongarch/iocsr_helper.c b/target/loongarch/iocsr_helper.c
35
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
25
index XXXXXXX..XXXXXXX 100644
36
}
26
--- a/target/loongarch/iocsr_helper.c
37
break;
27
+++ b/target/loongarch/iocsr_helper.c
38
28
@@ -XXX,XX +XXX,XX @@
39
- CASE_OP_32_64(brcond):
29
#include "exec/helper-proto.h"
40
- i = do_constant_folding_cond(opc, op->args[0],
30
#include "exec/exec-all.h"
41
- op->args[1], op->args[2]);
31
#include "exec/cpu_ldst.h"
42
- if (i == 0) {
32
-#include "tcg/tcg-ldst.h"
43
- tcg_op_remove(s, op);
33
44
- continue;
34
#define GET_MEMTXATTRS(cas) \
45
- } else if (i > 0) {
35
((MemTxAttrs){.requester_id = env_cpu(cas)->cpu_index})
46
- memset(&ctx.temps_used, 0, sizeof(ctx.temps_used));
47
- op->opc = opc = INDEX_op_br;
48
- op->args[0] = op->args[3];
49
- break;
50
- }
51
- break;
52
-
53
CASE_OP_32_64(movcond):
54
i = do_constant_folding_cond(opc, op->args[1],
55
op->args[2], op->args[5]);
56
@@ -XXX,XX +XXX,XX @@ void tcg_optimize(TCGContext *s)
57
CASE_OP_32_64_VEC(andc):
58
done = fold_andc(&ctx, op);
59
break;
60
+ CASE_OP_32_64(brcond):
61
+ done = fold_brcond(&ctx, op);
62
+ break;
63
case INDEX_op_brcond2_i32:
64
done = fold_brcond2(&ctx, op);
65
break;
66
--
36
--
67
2.25.1
37
2.34.1
68
69
diff view generated by jsdifflib